Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge VariantAnnotation and DatabaseVariantAnnotation records #1250

Closed
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.bdgenomics.adam.converters

import com.google.common.base.Splitter
import com.google.common.collect.ImmutableList
import htsjdk.variant.variantcontext.{
Allele,
Expand Down Expand Up @@ -353,14 +354,21 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
* @return Returns an Avro description of the genotyped site.
*/
private def createADAMVariant(vc: HtsjdkVariantContext, alt: Option[String]): Variant = {
// VCF CHROM, POS, ID, REF and ALT
// VCF CHROM, POS, ID, REF, FORMAT, and ALT
val builder = Variant.newBuilder
.setContigName(createContig(vc))
.setStart(vc.getStart - 1 /* ADAM is 0-indexed */ )
.setEnd(vc.getEnd /* ADAM is 0-indexed, so the 1-indexed inclusive end becomes exclusive */ )
.setReferenceAllele(vc.getReference.getBaseString)
alt.foreach(builder.setAlternateAllele(_))
splitIds(vc).foreach(builder.setNames(_))
builder.setFiltersApplied(vc.filtersWereApplied)
if (vc.filtersWereApplied) {
builder.setFiltersPassed(!vc.isFiltered)
}
if (vc.isFiltered) {
builder.setFiltersFailed(new java.util.ArrayList(vc.getFilters));
}
builder.build
}

Expand Down Expand Up @@ -418,11 +426,22 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
.setContigName(contigName)
.setStart(start)
.setEnd(end)
.setVariantCallingAnnotations(annotations)
.setSampleId(g.getSampleName)
.setAlleles(g.getAlleles.map(VariantContextConverter.convertAllele(vc, _)))
.setPhased(g.isPhased)

// copy variant calling annotations to update filter attributes
// (because the htsjdk Genotype is not available when build is called upstream)
val copy = VariantCallingAnnotations.newBuilder(annotations)
// htsjdk does not provide a field filtersWereApplied for genotype as it does in VariantContext
// we might be able to calculate it by querying the FT FORMAT field value directly
copy.setFiltersApplied(true)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would work:

g.getAnyAttribute("FT") != null

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we create an issue to track the upstream htsjdk issue?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created new issue #1269

copy.setFiltersPassed(!g.isFiltered)
if (g.isFiltered) {
copy.setFiltersFailed(Splitter.on(";").splitToList(g.getFilters))
}
genotype.setVariantCallingAnnotations(copy.build())

if (g.hasGQ) genotype.setGenotypeQuality(g.getGQ)
if (g.hasDP) genotype.setReadDepth(g.getDP)

Expand Down Expand Up @@ -522,13 +541,6 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
private def extractVariantCallingAnnotations(vc: HtsjdkVariantContext): VariantCallingAnnotations = {
val call: VariantCallingAnnotations.Builder = VariantCallingAnnotations.newBuilder

// VCF QUAL, FILTER and INFO fields
if (vc.filtersWereApplied && vc.isFiltered) {
call.setVariantIsPassing(false).setVariantFilters(new java.util.ArrayList(vc.getFilters))
} else if (vc.filtersWereApplied) {
call.setVariantIsPassing(true)
}

VariantAnnotationConverter.convert(vc, call.build())
}

Expand Down Expand Up @@ -588,8 +600,8 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N

if (g.getVariantCallingAnnotations != null) {
val callAnnotations = g.getVariantCallingAnnotations()
if (callAnnotations.getVariantFilters != null) {
gb.filters(callAnnotations.getVariantFilters)
if (callAnnotations.getFiltersPassed() != null && !callAnnotations.getFiltersPassed()) {
gb.filters(callAnnotations.getFiltersFailed())
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ object VariantContext {
optAnn: Option[VariantAnnotation]): VariantContext = {

// if the join yielded one or fewer annotation, pick what we've got. else, merge.
val ann = (v.databases, optAnn) match {
val ann = (v.annotations, optAnn) match {
case (None, a) => a
case (a, None) => a
case (Some(a), Some(b)) => Some(mergeAnnotations(a, b))
Expand Down Expand Up @@ -143,5 +143,5 @@ class VariantContext(
val position: ReferencePosition,
val variant: RichVariant,
val genotypes: Iterable[Genotype],
val databases: Option[VariantAnnotation] = None) {
val annotations: Option[VariantAnnotation] = None) {
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ case class VariantContextRDD(rdd: RDD[VariantContext],
* annotations attached to this VariantContextRDD.
*/
def toVariantAnnotationRDD: VariantAnnotationRDD = {
VariantAnnotationRDD(rdd.flatMap(_.databases), sequences)
VariantAnnotationRDD(rdd.flatMap(_.annotations), sequences)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,29 +136,70 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamGT.getPhaseQuality === 50)
}

test("Convert htsjdk SNV with different filters to ADAM") {
test("Convert htsjdk SNV with different variant filters to ADAM") {
val vcb = htsjdkSNVBuilder
vcb.genotypes(GenotypeBuilder.create("NA12878", vcb.getAlleles))

val converter = new VariantContextConverter

{ // No filters
val adamVCs = converter.convert(vcb.make)
val adamGT = adamVCs.flatMap(_.genotypes).head
assert(adamGT.getVariantCallingAnnotations.getVariantIsPassing === null)
val adamVariant = adamVCs.map(_.variant).head
assert(adamVariant.getFiltersApplied === false)
assert(adamVariant.getFiltersPassed === null)
assert(adamVariant.getFiltersFailed.isEmpty)
}
{ // PASSing
vcb.unfiltered.passFilters
val adamVCs = converter.convert(vcb.make)
val adamGT = adamVCs.flatMap(_.genotypes).head
assert(adamGT.getVariantCallingAnnotations.getVariantIsPassing)
val adamVariant = adamVCs.map(_.variant).head
assert(adamVariant.getFiltersApplied === true)
assert(adamVariant.getFiltersPassed === true)
assert(adamVariant.getFiltersFailed.isEmpty)
}
{ // not PASSing
vcb.unfiltered.filter("LowMQ")
val adamVCs = converter.convert(vcb.make)
val adamVariant = adamVCs.map(_.variant).head
assert(adamVariant.getFiltersApplied === true)
assert(adamVariant.getFiltersPassed === false)
assert(adamVariant.getFiltersFailed.sameElements(List("LowMQ")))
}
}

test("Convert htsjdk SNV with different genotype filters to ADAM") {
val vcb = htsjdkSNVBuilder
val gb = new GenotypeBuilder("NA12878", vcb.getAlleles)

val converter = new VariantContextConverter

{ // No filters
gb.unfiltered
vcb.genotypes(gb.make)
val adamVCs = converter.convert(vcb.make)
val adamGT = adamVCs.flatMap(_.genotypes).head
// htsjdk does not distinguish between filters not applied and filters passed in Genotype
assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true)
assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === true)
assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.isEmpty)
}
{ // PASSing
gb.filter("PASS")
vcb.genotypes(gb.make)
val adamVCs = converter.convert(vcb.make)
val adamGT = adamVCs.flatMap(_.genotypes).head
assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true)
assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === true)
assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.isEmpty)
}
{ // not PASSing
gb.filter("LowMQ")
vcb.genotypes(gb.make)
val adamVCs = converter.convert(vcb.make)
val adamGT = adamVCs.flatMap(_.genotypes).head
assert(adamGT.getVariantCallingAnnotations.getVariantIsPassing === false)
assert(adamGT.getVariantCallingAnnotations.getVariantFilters.sameElements(List("LowMQ")))
assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true)
assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === false)
assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.sameElements(List("LowMQ")))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class VariantContextRDDSuite extends ADAMFunSuite {
a0)), sd)

val annotated = vc.joinVariantAnnotations(vda).rdd
assert(annotated.map(_.databases.isDefined).reduce { (a, b) => a && b })
assert(annotated.map(_.annotations.isDefined).reduce { (a, b) => a && b })
}

sparkTest("don't lose any variants when piping as VCF") {
Expand Down