Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various small fixes #907

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class IndexedBamInputFormat extends BAMInputFormat {
override def createRecordReader(split: InputSplit, ctx: TaskAttemptContext): RecordReader[LongWritable, SAMRecordWritable] = {
val rr: RecordReader[LongWritable, SAMRecordWritable] = new BAMFilteredRecordReader()
assert(IndexedBamInputFormat.optViewRegion.isDefined)
BAMFilteredRecordReader.setRegion(IndexedBamInputFormat.optViewRegion.get)
IndexedBamInputFormat.optViewRegion.foreach { (refReg) => BAMFilteredRecordReader.setRegion(refReg) }
rr.initialize(split, ctx)
rr
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class ConsensusGeneratorFromKnowns(file: String, @transient sc: SparkContext) ex
val table = indelTable.value

// get region
val start = reads.map(_.record.getStart.toLong).reduce(_ min _)
val end = reads.map(_.getEnd.toLong).reduce(_ max _)
val start = reads.map(_.record.getStart).min
val end = reads.map(_.getEnd).max
val refId = reads.head.record.getContig.getContigName

val region = ReferenceRegion(refId, start, end + 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class ConsensusGeneratorFromSmithWaterman(wMatch: Double,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
val rds: Iterable[RichAlignmentRecord] = reads.map(r => {

val sw = new SmithWatermanConstantGapScoring(r.record.getSequence.toString,
val sw = new SmithWatermanConstantGapScoring(r.record.getSequence,
reference,
wMatch,
wMismatch,
Expand All @@ -52,7 +52,7 @@ class ConsensusGeneratorFromSmithWaterman(wMatch: Double,

// if we realign with fewer than three alignment blocks, then take the new alignment
if (sw.cigarX.numAlignmentBlocks <= 2) {
val mdTag = MdTag(r.record.getSequence.toString,
val mdTag = MdTag(r.record.getSequence,
reference.drop(sw.xStart),
sw.cigarX,
region.start)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class AlignmentRecordConverter extends Serializable {
val builder: SAMRecord = new SAMRecord(header.header)

// set canonically necessary fields
builder.setReadName(adamRecord.getReadName.toString)
builder.setReadName(adamRecord.getReadName)
builder.setReadString(adamRecord.getSequence)
adamRecord.getQual match {
case null => builder.setBaseQualityString("*")
Expand All @@ -110,18 +110,16 @@ class AlignmentRecordConverter extends Serializable {

// set read group flags
Option(adamRecord.getRecordGroupName)
.map(_.toString)
.map(rgDict.getSequenceIndex)
.foreach(v => builder.setAttribute("RG", v.toString))
.foreach(v => builder.setAttribute("RG", v))
Option(adamRecord.getRecordGroupLibrary)
.foreach(v => builder.setAttribute("LB", v.toString))
.foreach(v => builder.setAttribute("LB", v))
Option(adamRecord.getRecordGroupPlatformUnit)
.foreach(v => builder.setAttribute("PU", v.toString))
.foreach(v => builder.setAttribute("PU", v))

// set the reference name, and alignment position, for mate
Option(adamRecord.getMateContig)
.map(_.getContigName)
.map(_.toString)
.foreach(builder.setMateReferenceName)
Option(adamRecord.getMateAlignmentStart)
.foreach(s => builder.setMateAlignmentStart(s.toInt + 1))
Expand Down Expand Up @@ -161,9 +159,9 @@ class AlignmentRecordConverter extends Serializable {
builder.setReferenceName(adamRecord.getContig.getContigName)

// set the cigar, if provided
Option(adamRecord.getCigar).map(_.toString).foreach(builder.setCigarString)
Option(adamRecord.getCigar).foreach(builder.setCigarString)
// set the old cigar, if provided
Option(adamRecord.getOldCigar).map(_.toString).foreach(v => builder.setAttribute("OC", v))
Option(adamRecord.getOldCigar).foreach(v => builder.setAttribute("OC", v))
// set mapping flags
Option(adamRecord.getReadNegativeStrand)
.foreach(v => builder.setReadNegativeStrandFlag(v.booleanValue))
Expand All @@ -184,7 +182,6 @@ class AlignmentRecordConverter extends Serializable {
Option(adamRecord.getFailedVendorQualityChecks)
.foreach(v => builder.setReadFailsVendorQualityCheckFlag(v.booleanValue))
Option(adamRecord.getMismatchingPositions)
.map(_.toString)
.foreach(builder.setAttribute("MD", _))

// add all other tags
Expand All @@ -195,7 +192,7 @@ class AlignmentRecordConverter extends Serializable {
})
}

// return sam record
// return sam record
builder
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,21 @@ private[adam] object FastaConverter {
val (contigName, contigDescription) = parseDescriptionLine(descriptionLine, fileIndex)

private def parseDescriptionLine(descriptionLine: Option[String], id: Long): (Option[String], Option[String]) = {
if (descriptionLine.isEmpty) {
descriptionLine.fold {
assert(id == -1L, "Cannot have a headerless line in a file with more than one fragment.")
(None, None)
} else {
val splitIndex = descriptionLine.get.indexOf(' ')
(None: Option[String], None: Option[String])
} { (dL) =>
val splitIndex = dL.indexOf(' ')
if (splitIndex >= 0) {
val split = descriptionLine.get.splitAt(splitIndex)
val split = dL.splitAt(splitIndex)

val contigName: String = split._1.stripPrefix(">").trim
val contigDescription: String = split._2.trim

(Some(contigName), Some(contigDescription))

} else {
(Some(descriptionLine.get.stripPrefix(">").trim), None)
(Some(dL.stripPrefix(">").trim), None)
}
}
}
Expand Down Expand Up @@ -188,7 +188,7 @@ private[converters] class FastaConverter(fragmentLength: Long) extends Serializa
description: Option[String]): Seq[NucleotideContigFragment] = {

// get sequence length
val sequenceLength = sequence.map(_.length).reduce(_ + _)
val sequenceLength = sequence.map(_.length).sum

// map sequences into fragments
val sequencesAsFragments = mapFragments(sequence)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,17 +147,17 @@ class FastqRecordConverter extends Serializable with Logging {
val readSequence = lines(1)

if (stringency == ValidationStringency.STRICT && lines(3) == "*" && readSequence.length > 1)
throw new Exception(s"Fastq quality must be defined")
throw new Exception("Fastq quality must be defined")
else if (stringency == ValidationStringency.STRICT && lines(3).length != readSequence.length)
throw new Exception(s"Fastq sequence and quality strings must have the same length")
throw new Exception("Fastq sequence and quality strings must have the same length")

val readQualities =
if (lines(3) == "*")
"B" * readSequence.length
else if (lines(3).length < lines(1).length)
lines(3) + ("B" * (lines(1).length - lines(3).length))
else if (lines(3).length > lines(1).length)
throw new Exception(s"Not implemented")
throw new Exception("Not implemented")
else
lines(3)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ private[adam] class GenotypesToVariantsConverter(validateSamples: Boolean = fals
*/
def rms(values: Seq[Double]): Double = {
if (values.length > 0) {
sqrt(values.map(pow(_, 2.0)).reduce(_ + _) / values.length.toDouble)
sqrt(values.map(pow(_, 2.0)).sum / values.length.toDouble)
} else {
0.0
}
Expand Down Expand Up @@ -67,5 +67,5 @@ private[adam] class GenotypesToVariantsConverter(validateSamples: Boolean = fals
* @param values An array of non-phred scaled genotype quality scores.
* @return A non-phred scaled variant likelihood.
*/
def variantQualityFromGenotypes(values: Seq[Double]): Double = 1.0 - values.reduce(_ * _)
def variantQualityFromGenotypes(values: Seq[Double]): Double = 1.0 - values.product
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,14 @@ class SAMRecordConverter extends Serializable with Logging {
// This prevents looking up a -1 in the sequence dictionary
val readReference: Int = samRecord.getReferenceIndex
if (readReference != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
builder.setContig(SequenceRecord.toADAMContig(dict(samRecord.getReferenceName).get))
dict(samRecord.getReferenceName).foreach { (rec) =>
builder.setContig(SequenceRecord.toADAMContig(rec))
}

// set read alignment flag
val start: Int = samRecord.getAlignmentStart
assert(start != 0, "Start cannot equal 0 if contig is set.")
builder.setStart((start - 1).asInstanceOf[Long])
builder.setStart((start - 1))

// set OP and OC flags, if applicable
if (samRecord.getAttribute("OP") != null) {
Expand Down Expand Up @@ -127,7 +129,9 @@ class SAMRecordConverter extends Serializable with Logging {
val mateReference: Int = samRecord.getMateReferenceIndex

if (mateReference != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
builder.setMateContig(SequenceRecord.toADAMContig(dict(samRecord.getMateReferenceName).get))
dict(samRecord.getMateReferenceName).foreach { (rec) =>
builder.setMateContig(SequenceRecord.toADAMContig(rec))
}

val mateStart = samRecord.getMateAlignmentStart
if (mateStart > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ object VariantAnnotationConverter extends Serializable {
private def createFieldMap(keys: Seq[AttrKey], schema: Schema): Map[String, (Int, Object => Object)] = {
keys.filter(_.attrConverter != null).map(field => {
val avroField = schema.getField(field.adamKey)
field.vcfKey -> (avroField.pos, field.attrConverter)
field.vcfKey -> ((avroField.pos, field.attrConverter))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, what does this do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The compiler is in this case auto-tupling and inserting the parentheses that I'm here making more explicit. See e.g. http://stackoverflow.com/a/29252334/47978

It's an ill-documented and now kind of deprecated feature.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks! yeah, that shouldn't have worked before :)

})(collection.breakOut)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ object VariantContextConverter {
if (allele == null)
Seq()
else
Seq(Allele.create(allele.toString, isRef))
Seq(Allele.create(allele, isRef))
}

private def convertAlleles(v: Variant): java.util.Collection[Allele] = {
Expand All @@ -62,8 +62,8 @@ object VariantContextConverter {
if (alleles == null) return Collections.emptyList[Allele]
else g.getAlleles.map {
case GenotypeAllele.NoCall => Allele.NO_CALL
case GenotypeAllele.Ref | GenotypeAllele.OtherAlt => Allele.create(g.getVariant.getReferenceAllele.toString, true)
case GenotypeAllele.Alt => Allele.create(g.getVariant.getAlternateAllele.toString)
case GenotypeAllele.Ref | GenotypeAllele.OtherAlt => Allele.create(g.getVariant.getReferenceAllele, true)
case GenotypeAllele.Alt => Allele.create(g.getVariant.getAlternateAllele)
}
}

Expand Down Expand Up @@ -322,8 +322,8 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S
def convert(vc: ADAMVariantContext): BroadVariantContext = {
val variant: Variant = vc.variant
val vcb = new VariantContextBuilder()
.chr(refSeqToContig.getOrElse(variant.getContig.getContigName.toString,
variant.getContig.getContigName.toString))
.chr(refSeqToContig.getOrElse(variant.getContig.getContigName,
variant.getContig.getContigName))
.start(variant.getStart + 1 /* Recall ADAM is 0-indexed */ )
.stop(variant.getStart + variant.getReferenceAllele.length)
.alleles(VariantContextConverter.convertAlleles(variant))
Expand All @@ -334,7 +334,7 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S
try {
vcb.genotypes(vc.genotypes.map(g => {
val gb = new htsjdk.variant.variantcontext.GenotypeBuilder(
g.getSampleId.toString, VariantContextConverter.convertAlleles(g))
g.getSampleId, VariantContextConverter.convertAlleles(g))

Option(g.getIsPhased).foreach(gb.phased(_))
Option(g.getGenotypeQuality).foreach(gb.GQ(_))
Expand All @@ -346,7 +346,7 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S
if (g.getVariantCallingAnnotations != null) {
val callAnnotations = g.getVariantCallingAnnotations()
if (callAnnotations.getVariantFilters != null)
gb.filters(callAnnotations.getVariantFilters.map(_.toString))
gb.filters(callAnnotations.getVariantFilters)
}

if (g.getGenotypeLikelihoods != null && !g.getGenotypeLikelihoods.isEmpty)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ case class Transcript(id: String,
* @return the String representation of this Transcript's spliced mRNA sequence
*/
def extractTranscribedRNASequence(referenceSequence: String): String = {
val minStart = exons.map(_.region.start).toSeq.sorted.head.toInt
val minStart = exons.map(_.region.start).toSeq.min.toInt
// takes the max...

val maxEnd = -exons.map(-_.region.end).toSeq.sorted.head.toInt
val maxEnd = -exons.map(-_.region.end).toSeq.min.toInt
if (strand)
referenceSequence.substring(minStart, maxEnd)
else
Expand Down Expand Up @@ -231,4 +231,3 @@ object ReferenceUtils {
refs.toSeq.sorted.foldLeft(Seq[ReferenceRegion]())(folder)
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ object IndelTable {
def apply(variants: RDD[Variant]): IndelTable = {
val consensus: Map[String, Iterable[Consensus]] = variants.filter(v => v.getReferenceAllele.length != v.getAlternateAllele.length)
.map(v => {
val referenceName = v.getContig.getContigName.toString
val referenceName = v.getContig.getContigName
val consensus = if (v.getReferenceAllele.length > v.getAlternateAllele.length) {
// deletion
val deletionLength = v.getReferenceAllele.length - v.getAlternateAllele.length
Expand All @@ -77,7 +77,7 @@ object IndelTable {
} else {
val start = v.getStart + v.getReferenceAllele.length

Consensus(v.getAlternateAllele.toString.drop(v.getReferenceAllele.length), ReferenceRegion(referenceName, start, start + 1))
Consensus(v.getAlternateAllele.drop(v.getReferenceAllele.length), ReferenceRegion(referenceName, start, start + 1))
}

(referenceName, consensus)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ object ProgramRecord {
val id: String = pr.getId

// these fields are optional and can be left null, so must check for null...
val commandLine: Option[String] = Option(pr.getCommandLine).map(_.toString)
val name: Option[String] = Option(pr.getProgramName).map(_.toString)
val version: Option[String] = Option(pr.getProgramVersion).map(_.toString)
val previousID: Option[String] = Option(pr.getPreviousProgramGroupId).map(_.toString)
val commandLine: Option[String] = Option(pr.getCommandLine)
val name: Option[String] = Option(pr.getProgramName)
val version: Option[String] = Option(pr.getProgramVersion)
val previousID: Option[String] = Option(pr.getPreviousProgramGroupId)

new ProgramRecord(id, commandLine, name, version, previousID)
}
Expand All @@ -54,4 +54,3 @@ case class ProgramRecord(id: String,
pr
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ object ReferencePosition extends Serializable {
* @see fivePrime
*/
def apply(record: AlignmentRecord): ReferencePosition = {
new ReferencePosition(record.getContig.getContigName.toString, record.getStart)
new ReferencePosition(record.getContig.getContigName, record.getStart)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class SAMFileHeaderWritable(@transient hdr: SAMFileHeader) extends Serializable
}
protected val comments = {
val cmts: List[java.lang.String] = hdr.getComments
cmts.flatMap(Option(_)).map(_.toString) // don't trust samtools to return non-nulls
cmts.flatMap(Option(_)) // don't trust samtools to return non-nulls
}
protected val rgs = RecordGroupDictionary.fromSAMHeader(hdr)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class SequenceDictionary(val records: Vector[SequenceRecord]) extends Serializab
def isCompatibleWith(that: SequenceDictionary): Boolean = {
for (record <- that.records) {
val myRecord = byName.get(record.name)
if (myRecord.isDefined && myRecord.get != record)
if (myRecord.exists(_ != record))
return false
}
true
Expand Down Expand Up @@ -171,7 +171,7 @@ case class SequenceRecord(
* @return A SAM formatted sequence record.
*/
def toSAMSequenceRecord: SAMSequenceRecord = {
val rec = new SAMSequenceRecord(name.toString, length.toInt)
val rec = new SAMSequenceRecord(name, length.toInt)

// set md5 if available
md5.foreach(s => rec.setAttribute(SAMSequenceRecord.MD5_TAG, s.toUpperCase))
Expand Down Expand Up @@ -226,12 +226,12 @@ object SequenceRecord {
new SequenceRecord(
name,
length,
Option(url).map(_.toString),
Option(md5).map(_.toString),
Option(refseq).map(_.toString),
Option(genbank).map(_.toString),
Option(assembly).map(_.toString),
Option(species).map(_.toString),
Option(url),
Option(md5),
Option(refseq),
Option(genbank),
Option(assembly),
Option(species),
referenceIndex
)
}
Expand All @@ -258,8 +258,8 @@ object SequenceRecord {
}
def toSAMSequenceRecord(record: SequenceRecord): SAMSequenceRecord = {
val sam = new SAMSequenceRecord(record.name, record.length.toInt)
record.md5.foreach(v => sam.setAttribute(SAMSequenceRecord.MD5_TAG, v.toString))
record.url.foreach(v => sam.setAttribute(SAMSequenceRecord.URI_TAG, v.toString))
record.md5.foreach(v => sam.setAttribute(SAMSequenceRecord.MD5_TAG, v))
record.url.foreach(v => sam.setAttribute(SAMSequenceRecord.URI_TAG, v))
sam
}

Expand Down
Loading