diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java index c19ed8e3e9c..3f3258d6161 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java @@ -10,6 +10,7 @@ import htsjdk.variant.vcf.VCFConstants; import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; +import org.broadinstitute.hellbender.tools.walkers.sv.SVSegment; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -34,7 +35,8 @@ public class SVCallRecord implements SVLocatable { GATKSVVCFConstants.END2_ATTRIBUTE, GATKSVVCFConstants.STRANDS_ATTRIBUTE, GATKSVVCFConstants.SVTYPE, - GATKSVVCFConstants.CPX_TYPE + GATKSVVCFConstants.CPX_TYPE, + GATKSVVCFConstants.CPX_INTERVALS ); private final String id; @@ -57,6 +59,7 @@ public class SVCallRecord implements SVLocatable { // CPX related fields private final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype; + private final List cpxIntervals; public SVCallRecord(final String id, final String contigA, @@ -67,6 +70,7 @@ public SVCallRecord(final String id, final Boolean strandB, final GATKSVVCFConstants.StructuralVariantAnnotationType type, final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype, + final List cpxIntervals, final Integer length, final List algorithms, final List alleles, @@ -75,7 +79,7 @@ public SVCallRecord(final String id, final Set filters, final Double log10PError, final SAMSequenceDictionary dictionary) { - this(id, contigA, positionA, strandA, contigB, positionB, strandB, type, cpxSubtype, length, algorithms, alleles, genotypes, attributes, filters, log10PError); + this(id, contigA, positionA, strandA, contigB, positionB, strandB, type, cpxSubtype, cpxIntervals, length, algorithms, alleles, genotypes, attributes, filters, log10PError); validateCoordinates(dictionary); } @@ -88,6 +92,7 @@ protected SVCallRecord(final String id, final Boolean strandB, final GATKSVVCFConstants.StructuralVariantAnnotationType type, final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype, + final List cpxIntervals, final Integer length, final List algorithms, final List alleles, @@ -100,6 +105,7 @@ protected SVCallRecord(final String id, Utils.nonNull(genotypes); Utils.nonNull(attributes); Utils.nonNull(filters); + Utils.nonNull(cpxIntervals); this.id = Utils.nonNull(id); this.contigA = contigA; this.positionA = positionA; @@ -107,6 +113,7 @@ protected SVCallRecord(final String id, this.positionB = positionB; this.type = Utils.nonNull(type); this.cpxSubtype = cpxSubtype; + this.cpxIntervals = canonicalizeComplexEventList(cpxIntervals); this.algorithms = Collections.unmodifiableList(algorithms); this.alleles = Collections.unmodifiableList(alleles); this.altAlleles = alleles.stream().filter(allele -> !allele.isNoCall() && !allele.isReference()).collect(Collectors.toList()); @@ -135,11 +142,26 @@ private void validateCoordinates(final SAMSequenceDictionary dictionary) { // CPX types may have position B precede A, such as dispersed duplications where A is the insertion point and // B references the source sequence. if (type != GATKSVVCFConstants.StructuralVariantAnnotationType.CPX) { - Utils.validateArg(IntervalUtils.compareLocatables(getPositionAInterval(), getPositionBInterval(), dictionary) <= 0, - "End coordinate cannot precede start"); + if (IntervalUtils.compareLocatables(getPositionAInterval(), getPositionBInterval(), dictionary) > 0) { + throw new IllegalArgumentException("End precedes start in variant " + id); + } + } + for (final ComplexEventInterval interval : cpxIntervals) { + Utils.nonNull(interval); + validatePosition(interval.getContig(), interval.getStart(), dictionary); + validatePosition(interval.getContig(), interval.getEnd(), dictionary); } } + /** + * Sorts complex intervals list so that they can be efficiently compared across records. + * @param intervals complex intervals + * @return canonicalized list + */ + private static List canonicalizeComplexEventList(final List intervals) { + return intervals.stream().sorted(Comparator.comparing(ComplexEventInterval::encode)).collect(Collectors.toList()); + } + private static void validatePosition(final String contig, final int position, final SAMSequenceDictionary dictionary) { final SAMSequenceRecord seq = dictionary.getSequence(contig); Utils.validateArg(seq != null, "Contig " + contig + " not found in dictionary"); @@ -148,7 +170,7 @@ private static void validatePosition(final String contig, final int position, fi private static Map validateAttributes(final Map attributes) { for (final String key : INVALID_ATTRIBUTES) { - Utils.validateArg(!attributes.containsKey(key), "Attempted to create record with invalid key: " + key); + Utils.validateArg(!attributes.containsKey(key), "Attempted to create record with reserved key: " + key); } return attributes; } @@ -180,6 +202,7 @@ private static Integer inferLength(final GATKSVVCFConstants.StructuralVariantAnn || type == GATKSVVCFConstants.StructuralVariantAnnotationType.CTX) && inputLength != null) { throw new IllegalArgumentException("Input length should be null for type " + type.name() + " but found " + inputLength); } + // TODO complex subtypes should be checked and handled properly, but for now we just pass through SVLEN return inputLength; } } @@ -384,4 +407,45 @@ public Double getLog10PError() { return log10PError; } + public List getComplexEventIntervals() { + return cpxIntervals; + } + + public static final class ComplexEventInterval extends SVSegment { + + public ComplexEventInterval(final GATKSVVCFConstants.StructuralVariantAnnotationType intervalType, + final SimpleInterval interval) { + super(intervalType, interval); + } + + public static ComplexEventInterval decode(final String str, final SAMSequenceDictionary dictionary) { + Utils.nonNull(str); + final String[] tokens = str.split("_", 2); + if (tokens.length < 2) { + throw new IllegalArgumentException("Expected complex interval with format \"SVTYPE_chr:pos-end\" but found \"" + str + "\""); + } + final SimpleInterval interval = new SimpleInterval(tokens[1]); + if (!IntervalUtils.intervalIsOnDictionaryContig(interval, dictionary)) { + throw new IllegalArgumentException("Invalid CPX interval: " + interval); + } + return new ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.valueOf(tokens[0]), interval); + } + + public String encode() { + return getIntervalSVType().name() + "_" + getInterval().toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ComplexEventInterval that = (ComplexEventInterval) o; + return getIntervalSVType() == that.getIntervalSVType() && Objects.equals(getInterval(), that.getInterval()); + } + + @Override + public int hashCode() { + return Objects.hash(getIntervalSVType(), getInterval()); + } + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java index 2f246a3ea5a..cf31d654727 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java @@ -24,8 +24,7 @@ public final class SVCallRecordUtils { private static final Set VALID_TYPES = new HashSet<>(Arrays.asList(GATKSVVCFConstants.StructuralVariantAnnotationType.values()).stream() .map(GATKSVVCFConstants.StructuralVariantAnnotationType::name).collect(Collectors.toList())); - private static final Set VALID_CPX_SUBTYPES = new HashSet<>(Arrays.asList(GATKSVVCFConstants.ComplexVariantSubtype.values()).stream() - .map(GATKSVVCFConstants.ComplexVariantSubtype::name).collect(Collectors.toList())); + private static final Set VALID_CPX_SUBTYPES = GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.keySet(); /** * Create a builder for a variant from an {@link SVCallRecord} for VCF interoperability @@ -34,34 +33,18 @@ public final class SVCallRecordUtils { */ public static VariantContextBuilder getVariantBuilder(final SVCallRecord record) { Utils.nonNull(record); - final int end; final GATKSVVCFConstants.StructuralVariantAnnotationType type = record.getType(); - final GATKSVVCFConstants.ComplexVariantSubtype cpxType = record.getComplexSubtype(); - final boolean isDispersedDup = cpxType == GATKSVVCFConstants.ComplexVariantSubtype.dDUP - || cpxType == GATKSVVCFConstants.ComplexVariantSubtype.dDUP_iDEL; - if (type == GATKSVVCFConstants.StructuralVariantAnnotationType.INS - || type == GATKSVVCFConstants.StructuralVariantAnnotationType.BND - || type == GATKSVVCFConstants.StructuralVariantAnnotationType.CTX - || isDispersedDup) { - end = record.getPositionA(); - } else { - end = record.getPositionB(); - } + final int end; final Integer end2; final String chr2; if (type == GATKSVVCFConstants.StructuralVariantAnnotationType.BND - || type == GATKSVVCFConstants.StructuralVariantAnnotationType.CTX) { + || type == GATKSVVCFConstants.StructuralVariantAnnotationType.CTX) { + // TODO this may need to be modified in the future to handle complex translocations + end = record.getPositionA(); end2 = record.getPositionB(); chr2 = record.getContigB(); - } else if (type == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX) { - if (isDispersedDup) { - end2 = record.getPositionB(); - chr2 = record.getContigB(); - } else { - end2 = null; - chr2 = null; - } } else { + end = record.getPositionB(); end2 = null; chr2 = null; } @@ -90,14 +73,21 @@ public static VariantContextBuilder getVariantBuilder(final SVCallRecord record) builder.attribute(GATKSVVCFConstants.END2_ATTRIBUTE, end2); builder.attribute(GATKSVVCFConstants.CONTIG2_ATTRIBUTE, chr2); } + final GATKSVVCFConstants.ComplexVariantSubtype cpxType = record.getComplexSubtype(); if (cpxType != null) { builder.attribute(GATKSVVCFConstants.CPX_TYPE, getComplexSubtypeString(cpxType)); } + final List cpxIntervals = record.getComplexEventIntervals(); + if (!cpxIntervals.isEmpty()) { + builder.attribute(GATKSVVCFConstants.CPX_INTERVALS, cpxIntervals.stream().map(SVCallRecord.ComplexEventInterval::encode).collect(Collectors.toList())); + } builder.attribute(GATKSVVCFConstants.SVLEN, record.getLength()); if ((svtype == GATKSVVCFConstants.StructuralVariantAnnotationType.BND || svtype == GATKSVVCFConstants.StructuralVariantAnnotationType.INV - || svtype == GATKSVVCFConstants.StructuralVariantAnnotationType.INS) + || svtype == GATKSVVCFConstants.StructuralVariantAnnotationType.INS + || svtype == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX + || svtype == GATKSVVCFConstants.StructuralVariantAnnotationType.CTX) && record.getStrandA() != null && record.getStrandB() != null) { builder.attribute(GATKSVVCFConstants.STRANDS_ATTRIBUTE, getStrandString(record)); } @@ -183,12 +173,12 @@ public static GenotypesContext populateGenotypesForMissingSamplesWithAlleles(fin */ public static SVCallRecord copyCallWithNewGenotypes(final SVCallRecord record, final GenotypesContext genotypes) { return new SVCallRecord(record.getId(), record.getContigA(), record.getPositionA(), record.getStrandA(), record.getContigB(), - record.getPositionB(), record.getStrandB(), record.getType(), record.getComplexSubtype(), record.getLength(), record.getAlgorithms(), record.getAlleles(), + record.getPositionB(), record.getStrandB(), record.getType(), record.getComplexSubtype(), record.getComplexEventIntervals(), record.getLength(), record.getAlgorithms(), record.getAlleles(), genotypes, record.getAttributes(), record.getFilters(), record.getLog10PError()); } public static SVCallRecord copyCallWithNewAttributes(final SVCallRecord record, final Map attr) { return new SVCallRecord(record.getId(), record.getContigA(), record.getPositionA(), record.getStrandA(), record.getContigB(), - record.getPositionB(), record.getStrandB(), record.getType(), record.getComplexSubtype(), record.getLength(), record.getAlgorithms(), record.getAlleles(), + record.getPositionB(), record.getStrandB(), record.getType(), record.getComplexSubtype(), record.getComplexEventIntervals(), record.getLength(), record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), attr, record.getFilters(), record.getLog10PError()); } @@ -300,20 +290,19 @@ public static Stream convertInversionsToBreakends(final SVCallReco } Utils.validateArg(record.isIntrachromosomal(), "Inversion " + record.getId() + " is not intrachromosomal"); final SVCallRecord positiveBreakend = new SVCallRecord(record.getId(), record.getContigA(), - record.getPositionA(), true, record.getContigB(), record.getPositionB(), true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null,null, + record.getPositionA(), true, record.getContigB(), record.getPositionB(), true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null,record.getComplexEventIntervals(), null, record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), record.getFilters(), record.getLog10PError(), dictionary); final SVCallRecord negativeBreakend = new SVCallRecord(record.getId(), record.getContigA(), - record.getPositionA(), false, record.getContigB(), record.getPositionB(), false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null,null, + record.getPositionA(), false, record.getContigB(), record.getPositionB(), false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null,record.getComplexEventIntervals(), null, record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), record.getFilters(), record.getLog10PError(), dictionary); return Stream.of(positiveBreakend, negativeBreakend); } /** * Creates a new {@link SVCallRecord} from the given {@link VariantContext}, keeping any variant fields. - * @see SVCallRecordUtils#create(VariantContext, boolean) */ - public static SVCallRecord create(final VariantContext variant) { - return create(variant, true); + public static SVCallRecord create(final VariantContext variant, final SAMSequenceDictionary dictionary) { + return create(variant, true, dictionary); } /** @@ -322,7 +311,7 @@ public static SVCallRecord create(final VariantContext variant) { * @param keepVariantAttributes retain variant attribute fields * @return converted record */ - public static SVCallRecord create(final VariantContext variant, boolean keepVariantAttributes) { + public static SVCallRecord create(final VariantContext variant, boolean keepVariantAttributes, final SAMSequenceDictionary dictionary) { Utils.nonNull(variant); final String id = variant.getID(); final String contigA = variant.getContig(); @@ -330,6 +319,7 @@ public static SVCallRecord create(final VariantContext variant, boolean keepVari final GATKSVVCFConstants.StructuralVariantAnnotationType type = inferStructuralVariantType(variant); final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype = getComplexSubtype(variant); + final List cpxIntervals = parseComplexIntervals(variant.getAttributeAsStringList(GATKSVVCFConstants.CPX_INTERVALS, null), dictionary); final List algorithms = getAlgorithms(variant); final String strands; @@ -367,21 +357,11 @@ public static SVCallRecord create(final VariantContext variant, boolean keepVari || type == GATKSVVCFConstants.StructuralVariantAnnotationType.CTX) { if (!(hasContig2 && hasEnd2)) { throw new UserException.BadInput("Attributes " + GATKSVVCFConstants.END2_ATTRIBUTE + - " and " + GATKSVVCFConstants.CONTIG2_ATTRIBUTE + " are required for BND records (variant " + - variant.getID() + ")."); + " and " + GATKSVVCFConstants.CONTIG2_ATTRIBUTE + " are required for BND and CTX records " + + "(variant " + variant.getID() + ")."); } contigB = variant.getAttributeAsString(GATKSVVCFConstants.CONTIG2_ATTRIBUTE, null); positionB = variant.getAttributeAsInt(GATKSVVCFConstants.END2_ATTRIBUTE, 0); - } else if (type == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX) { - // If CHR2/END2 are defined, use them - if (hasContig2 && hasEnd2) { - contigB = variant.getAttributeAsString(GATKSVVCFConstants.CONTIG2_ATTRIBUTE, null); - positionB = variant.getAttributeAsInt(GATKSVVCFConstants.END2_ATTRIBUTE, 0); - } else { - // Otherwise treat like any other variant - contigB = contigA; - positionB = variant.getEnd(); - } } else { contigB = contigA; // Force reset of END coordinate @@ -394,8 +374,13 @@ public static SVCallRecord create(final VariantContext variant, boolean keepVari final Double log10PError = variant.hasLog10PError() ? variant.getLog10PError() : null; final Map sanitizedAttributes = sanitizeAttributes(attributes); - return new SVCallRecord(id, contigA, positionA, strand1, contigB, positionB, strand2, type, cpxSubtype, length, algorithms, - variant.getAlleles(), variant.getGenotypes(), sanitizedAttributes, variant.getFilters(), log10PError); + return new SVCallRecord(id, contigA, positionA, strand1, contigB, positionB, strand2, type, cpxSubtype, + cpxIntervals, length, algorithms, variant.getAlleles(), variant.getGenotypes(), sanitizedAttributes, + variant.getFilters(), log10PError); + } + + private static List parseComplexIntervals(final List intervals, final SAMSequenceDictionary dictionary) { + return intervals.stream().map(i -> SVCallRecord.ComplexEventInterval.decode(i, dictionary)).toList(); } private static Map sanitizeAttributes(final Map attributes) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java index 12d93a3baa2..39228617d26 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java @@ -192,8 +192,8 @@ public SVCallRecord collapse(final SVClusterEngine.OutputCluster cluster) { final Double quality = collapseQuality(items); return new SVCallRecord(representative.getId(), representative.getContigA(), start, strandA, representative.getContigB(), - end, strandB, type, representative.getComplexSubtype(), length, algorithms, alleles, genotypes, attributes, - filters, quality, dictionary); + end, strandB, type, representative.getComplexSubtype(), representative.getComplexEventIntervals(), + length, algorithms, alleles, genotypes, attributes, filters, quality, dictionary); } protected List collapseAlleles(final List altAlleles, final Allele refAllele) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java index e800e0283d9..854f4120b51 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java @@ -9,6 +9,9 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import java.util.Iterator; +import java.util.List; + /** *

Main class for SV clustering. Two items are clustered together if they:

*
    @@ -154,6 +157,13 @@ private static boolean clusterTogetherWithParams(final SVCallRecord a, final SVC return false; } + // If complex, test complex intervals + if (a.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX + && b.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX && + !testComplexIntervals(a, b, params.getReciprocalOverlap(), params.getSizeSimilarity(), params.getWindow(), dictionary)) { + return false; + } + // Reciprocal overlap and size similarity // Check bypassed if both are inter-chromosomal final Boolean hasReciprocalOverlapAndSizeSimilarity; @@ -182,6 +192,40 @@ private static boolean clusterTogetherWithParams(final SVCallRecord a, final SVC } } + /** + * Performs overlap testing on each pair of complex intervals in two records, requiring each pair to be + * sufficiently similar by reciprocal overlap, size similarity, and breakend proximity. + */ + private static boolean testComplexIntervals(final SVCallRecord a, final SVCallRecord b, final double overlapThreshold, + final double sizeSimilarityThreshold, final int window, + final SAMSequenceDictionary dictionary) { + final List intervalsA = a.getComplexEventIntervals(); + final List intervalsB = b.getComplexEventIntervals(); + if (intervalsA.size() != intervalsB.size()) { + return false; + } + final Iterator iterA = intervalsA.iterator(); + final Iterator iterB = intervalsB.iterator(); + for (int i = 0; i < intervalsA.size(); i++) { + final SVCallRecord.ComplexEventInterval cpxIntervalA = iterA.next(); + final SVCallRecord.ComplexEventInterval cpxIintervalB = iterB.next(); + if (cpxIntervalA.getIntervalSVType() != cpxIintervalB.getIntervalSVType()) { + return false; + } + final SimpleInterval intervalA = cpxIntervalA.getInterval(); + final SimpleInterval intervalB = cpxIintervalB.getInterval(); + if (!(IntervalUtils.isReciprocalOverlap(intervalA, intervalB, overlapThreshold) + && testSizeSimilarity(intervalA.getLengthOnReference(), intervalB.getLengthOnReference(), sizeSimilarityThreshold) + && testBreakendProximity(new SimpleInterval(intervalA.getContig(), intervalA.getStart(), intervalA.getStart()), + new SimpleInterval(intervalA.getContig(), intervalA.getEnd(), intervalA.getEnd()), + new SimpleInterval(intervalB.getContig(), intervalB.getStart(), intervalB.getStart()), + new SimpleInterval(intervalB.getContig(), intervalB.getEnd(), intervalB.getEnd()), window, dictionary))) { + return false; + } + } + return true; + } + private static boolean testReciprocalOverlap(final SVCallRecord a, final SVCallRecord b, final double threshold) { final SimpleInterval intervalA = new SimpleInterval(a.getContigA(), a.getPositionA(), a.getPositionA() + getLength(a, INSERTION_ASSUMED_LENGTH_FOR_OVERLAP) - 1); final SimpleInterval intervalB = new SimpleInterval(b.getContigA(), b.getPositionA(), b.getPositionA() + getLength(b, INSERTION_ASSUMED_LENGTH_FOR_OVERLAP) - 1); @@ -189,28 +233,36 @@ private static boolean testReciprocalOverlap(final SVCallRecord a, final SVCallR } private static boolean testSizeSimilarity(final SVCallRecord a, final SVCallRecord b, final double threshold) { - final int sizeSimilarityLengthA = getLength(a, INSERTION_ASSUMED_LENGTH_FOR_SIZE_SIMILARITY); - final int sizeSimilarityLengthB = getLength(b, INSERTION_ASSUMED_LENGTH_FOR_SIZE_SIMILARITY); - return Math.min(sizeSimilarityLengthA, sizeSimilarityLengthB) / (double) Math.max(sizeSimilarityLengthA, sizeSimilarityLengthB) >= threshold; + return testSizeSimilarity(getLength(a, INSERTION_ASSUMED_LENGTH_FOR_SIZE_SIMILARITY), + getLength(b, INSERTION_ASSUMED_LENGTH_FOR_SIZE_SIMILARITY), threshold); + } + + private static boolean testSizeSimilarity(final int lengthA, final int lengthB, final double threshold) { + return Math.min(lengthA, lengthB) / (double) Math.max(lengthA, lengthB) >= threshold; } private static boolean testBreakendProximity(final SVCallRecord a, final SVCallRecord b, final int window, final SAMSequenceDictionary dictionary) { - final SimpleInterval intervalA1 = a.getPositionAInterval().expandWithinContig(window, dictionary); - final SimpleInterval intervalA2 = a.getPositionBInterval().expandWithinContig(window, dictionary); - if (intervalA1 == null) { - logger.warn("Invalid start position " + a.getPositionA() + " in record " + a.getId() + + return testBreakendProximity(a.getPositionAInterval(), a.getPositionBInterval(), + b.getPositionAInterval(), b.getPositionBInterval(), window, dictionary); + } + + private static boolean testBreakendProximity(final SimpleInterval intervalA1, final SimpleInterval intervalA2, + final SimpleInterval intervalB1, final SimpleInterval intervalB2, + final int window, final SAMSequenceDictionary dictionary) { + final SimpleInterval intervalA1Padded = intervalA1.expandWithinContig(window, dictionary); + final SimpleInterval intervalA2Padded = intervalA2.expandWithinContig(window, dictionary); + if (intervalA1Padded == null) { + logger.warn("Invalid start position " + intervalA1.getContig() + ":" + intervalA1.getStart() + " - record will not be matched"); return false; } - if (intervalA2 == null) { - logger.warn("Invalid end position " + a.getPositionB() + " in record " + a.getId() + + if (intervalA2Padded == null) { + logger.warn("Invalid end position " + intervalA2.getContig() + ":" + intervalA2.getStart() + " - record will not be matched"); return false; } - final SimpleInterval intervalB1 = b.getPositionAInterval(); - final SimpleInterval intervalB2 = b.getPositionBInterval(); - return intervalA1.overlaps(intervalB1) && intervalA2.overlaps(intervalB2); + return intervalA1Padded.overlaps(intervalB1) && intervalA2Padded.overlaps(intervalB2); } /** @@ -218,7 +270,8 @@ private static boolean testBreakendProximity(final SVCallRecord a, final SVCallR */ private static int getLength(final SVCallRecord record, final int missingInsertionLength) { Utils.validate(record.isIntrachromosomal(), "Record must be intra-chromosomal"); - if (record.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.INS) { + if (record.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.INS || + record.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX) { return record.getLength() == null ? missingInsertionLength : Math.max(record.getLength(), 1); } else if (record.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.BND) { return record.getPositionB() - record.getPositionA() + 1; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentation.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentation.java index 81138035bb1..e447d2c1086 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentation.java @@ -646,11 +646,11 @@ public void closeTool(){ * @param minQuality drop events with quality lower than this * @return a new record or null */ - public static SVCallRecord createDepthOnlyFromGCNVWithOriginalGenotypes(final VariantContext variant, - final double minQuality, - final Set allosomalContigs, - final int refAutosomalCopyNumber, - final SampleDB sampleDB) { + public SVCallRecord createDepthOnlyFromGCNVWithOriginalGenotypes(final VariantContext variant, + final double minQuality, + final Set allosomalContigs, + final int refAutosomalCopyNumber, + final SampleDB sampleDB) { Utils.nonNull(variant); if (variant.getGenotypes().size() == 1) { //only cluster good variants @@ -672,7 +672,7 @@ public static SVCallRecord createDepthOnlyFromGCNVWithOriginalGenotypes(final Va .collect(Collectors.toList()); svBuilder.genotypes(genotypesWithECN); - final SVCallRecord baseRecord = SVCallRecordUtils.create(svBuilder.make(), true); + final SVCallRecord baseRecord = SVCallRecordUtils.create(svBuilder.make(), true, dictionary); final List nonRefGenotypes = baseRecord.getGenotypes().stream() .filter(g -> !(g.isHomRef() || (g.isNoCall() && !g.hasExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT)))) .collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngine.java index 56b25ff8a5e..d49a351c39f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngine.java @@ -13,7 +13,6 @@ import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfFeature; import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfTranscriptFeature; import org.broadinstitute.hellbender.utils.variant.GATKSVVariantContextUtils; @@ -58,36 +57,6 @@ public class SVAnnotateEngine { GATKSVVCFConstants.ComplexVariantSubtype.delINVdup, GATKSVVCFConstants.ComplexVariantSubtype.dDUP_iDEL); - // Mini class to package SV type and interval into one object - @VisibleForTesting - protected static final class SVSegment { - private final GATKSVVCFConstants.StructuralVariantAnnotationType intervalSVType; - private final SimpleInterval interval; - protected SVSegment(final GATKSVVCFConstants.StructuralVariantAnnotationType svType, final SimpleInterval interval) { - this.intervalSVType = svType; - this.interval = interval; - } - public GATKSVVCFConstants.StructuralVariantAnnotationType getIntervalSVType() { - return intervalSVType; - } - public SimpleInterval getInterval() { - return interval; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - final SVSegment svSegment = (SVSegment) o; - return intervalSVType == svSegment.intervalSVType && interval.equals(svSegment.interval); - } - - @Override - public int hashCode() { - return Objects.hash(intervalSVType, interval); - } - } - // Container class for all SVIntervalTree trees created from the GTF @VisibleForTesting public static final class GTFIntervalTreesContainer { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVCluster.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVCluster.java index 4e483652d3c..791befb79fb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVCluster.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVCluster.java @@ -378,7 +378,7 @@ public void closeTool() { @Override public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { - final SVCallRecord call = SVCallRecordUtils.create(variant); + final SVCallRecord call = SVCallRecordUtils.create(variant, dictionary); final SVCallRecord filteredCall; if (fastMode && call.getType() != GATKSVVCFConstants.StructuralVariantAnnotationType.CNV) { // Strip out non-carrier genotypes to save memory and compute @@ -447,9 +447,9 @@ public VariantContext buildVariantContext(final SVCallRecord call) { // Build new variant final SVCallRecord finalCall = new SVCallRecord(newId, call.getContigA(), call.getPositionA(), call.getStrandA(), - call.getContigB(), call.getPositionB(), call.getStrandB(), call.getType(), call.getComplexSubtype(), call.getLength(), - call.getAlgorithms(), call.getAlleles(), filledGenotypes, call.getAttributes(), call.getFilters(), - call.getLog10PError(), dictionary); + call.getContigB(), call.getPositionB(), call.getStrandB(), call.getType(), call.getComplexSubtype(), + call.getComplexEventIntervals(), call.getLength(), call.getAlgorithms(), call.getAlleles(), filledGenotypes, + call.getAttributes(), call.getFilters(), call.getLog10PError(), dictionary); final VariantContextBuilder builder = SVCallRecordUtils.getVariantBuilder(finalCall); if (omitMembers) { builder.rmAttribute(GATKSVVCFConstants.CLUSTER_MEMBER_IDS_KEY); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java index 2272a84fc11..56c389539c6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java @@ -180,7 +180,7 @@ public void apply(final TruthVersusEval truthVersusEval, final ReadsContext read } private void add(final VariantContext variant, final boolean isTruth) { - SVCallRecord record = SVCallRecordUtils.create(variant); + SVCallRecord record = SVCallRecordUtils.create(variant, dictionary); if (!record.getContigA().equals(currentContig)) { flushClusters(true); currentContig = record.getContigA(); @@ -199,8 +199,8 @@ protected SVCallRecord minimizeTruthFootprint(final SVCallRecord item) { final List genotypes = item.getGenotypes().stream().map(SVConcordance::stripTruthGenotype).collect(Collectors.toList()); return new SVCallRecord(item.getId(), item.getContigA(), item.getPositionA(), item.getStrandA(), item.getContigB(), item.getPositionB(), item.getStrandB(), item.getType(), - item.getComplexSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes, - item.getAttributes(), item.getFilters(), item.getLog10PError(), dictionary); + item.getComplexSubtype(), item.getComplexEventIntervals(), item.getLength(), item.getAlgorithms(), + item.getAlleles(), genotypes, item.getAttributes(), item.getFilters(), item.getLog10PError(), dictionary); } private static Genotype stripTruthGenotype(final Genotype genotype) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVSegment.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVSegment.java new file mode 100644 index 00000000000..29f4d5464d0 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVSegment.java @@ -0,0 +1,56 @@ +package org.broadinstitute.hellbender.tools.walkers.sv; + +import htsjdk.samtools.util.Locatable; +import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.Objects; + +// Mini class to package SV type and interval into one object +public class SVSegment implements Locatable { + protected final GATKSVVCFConstants.StructuralVariantAnnotationType intervalSVType; + protected final SimpleInterval interval; + + public SVSegment(final GATKSVVCFConstants.StructuralVariantAnnotationType svType, final SimpleInterval interval) { + Utils.nonNull(interval); + this.intervalSVType = svType; + this.interval = interval; + } + + public GATKSVVCFConstants.StructuralVariantAnnotationType getIntervalSVType() { + return intervalSVType; + } + + @Override + public String getContig() { + return interval.getContig(); + } + + @Override + public int getStart() { + return interval.getStart(); + } + + @Override + public int getEnd() { + return interval.getEnd(); + } + + public SimpleInterval getInterval() { + return interval; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + final SVSegment svSegment = (SVSegment) o; + return intervalSVType == svSegment.intervalSVType && interval.equals(svSegment.interval); + } + + @Override + public int hashCode() { + return Objects.hash(intervalSVType, interval); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUnitTest.java index 170df7984f6..35864d62f1a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUnitTest.java @@ -5,6 +5,7 @@ import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.variantcontext.GenotypesContext; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -75,7 +76,7 @@ public Object[][] testCreateInvalidCoordinatesData() { @Test(dataProvider="testCreateInvalidCoordinatesData", expectedExceptions = { IllegalArgumentException.class }) public void testCreateInvalidCoordinates(final String contigA, final int posA, final String contigB, final int posB) { new SVCallRecord("var1", contigA, posA, true, contigB, posB, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, - null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), + null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); Assert.fail("Expected exception not thrown"); } @@ -92,13 +93,14 @@ public Object[][] testCreateValidCoordinatesData() { @Test(dataProvider="testCreateValidCoordinatesData") public void testCreateValidCoordinates(final String contigA, final int posA, final String contigB, final int posB) { new SVCallRecord("var1", contigA, posA, true, contigB, posB, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, - null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), + null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); } + @Test public void testGetters() { final SVCallRecord record = new SVCallRecord("var1", "chr1", 100, true, "chr1", 200, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, - GATKSVVCFConstants.ComplexVariantSubtype.dDUP, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, Lists.newArrayList(SVCallRecord.ComplexEventInterval.decode("DUP_chr1:100-200", SVTestUtils.hg38Dict)), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), GenotypesContext.create(GenotypeBuilder.create("sample1", Lists.newArrayList(Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DEL))), Collections.singletonMap("TEST_KEY", "TEST_VALUE"), Collections.singleton("TEST_FILTER"), Double.valueOf(30), SVTestUtils.hg38Dict); Assert.assertEquals(record.getId(), "var1"); @@ -107,12 +109,56 @@ public void testGetters() { Assert.assertEquals(record.getStrandA(), Boolean.TRUE); Assert.assertEquals(record.getContigB(), "chr1"); Assert.assertEquals(record.getPositionB(), 200); - Assert.assertEquals(record.getStrandA(), Boolean.FALSE); + Assert.assertEquals(record.getStrandB(), Boolean.FALSE); Assert.assertEquals(record.getAlgorithms(), SVTestUtils.PESR_ONLY_ALGORITHM_LIST); Assert.assertEquals(record.getGenotypes().get("sample1").getAlleles(), Lists.newArrayList(Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DEL)); Assert.assertEquals(record.getAttributes(), Collections.singletonMap("TEST_KEY", "TEST_VALUE")); Assert.assertEquals(record.getAlleles(), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL)); Assert.assertEquals(record.getFilters(), Collections.singleton("TEST_FILTER")); Assert.assertEquals(record.getLog10PError(), Double.valueOf(30)); + Assert.assertEquals(record.getComplexSubtype(), GATKSVVCFConstants.ComplexVariantSubtype.dDUP); + Assert.assertEquals(record.getComplexEventIntervals(), Lists.newArrayList(SVCallRecord.ComplexEventInterval.decode("DUP_chr1:100-200", SVTestUtils.hg38Dict))); + } + + @Test + public void testComplexEventInterval() { + final SVCallRecord.ComplexEventInterval cpx1 = SVCallRecord.ComplexEventInterval.decode("DEL_chr1:100-200", SVTestUtils.hg38Dict); + Assert.assertEquals(cpx1.getIntervalSVType(), GATKSVVCFConstants.StructuralVariantAnnotationType.DEL); + Assert.assertEquals(cpx1.getInterval(), new SimpleInterval("chr1", 100, 200)); + Assert.assertEquals(cpx1.getContig(), "chr1"); + Assert.assertEquals(cpx1.getStart(), 100); + Assert.assertEquals(cpx1.getEnd(), 200); + Assert.assertEquals(cpx1.encode(), "DEL_chr1:100-200"); + } + + @DataProvider(name = "testComplexEventIntervalEqualsData") + public Object[][] testComplexEventIntervalEqualsData() { + return new Object[][]{ + {SVCallRecord.ComplexEventInterval.decode("DEL_chr1:100-200", SVTestUtils.hg38Dict), true}, + {new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 100, 200)), true}, + {new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr1", 100, 200)), false}, + {new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr2", 100, 200)), false}, + {new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 101, 200)), false}, + {new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 100, 201)), false} + }; + } + @Test(dataProvider="testComplexEventIntervalEqualsData") + public void testComplexEventIntervalEquals(final SVCallRecord.ComplexEventInterval cpx2, final boolean expected) { + final SVCallRecord.ComplexEventInterval cpx1 = SVCallRecord.ComplexEventInterval.decode("DEL_chr1:100-200", SVTestUtils.hg38Dict); + Assert.assertEquals(cpx1.equals(cpx2), expected); + if (expected) { + Assert.assertEquals(cpx1.hashCode(), cpx2.hashCode()); + } + } + + @Test(expectedExceptions = {IllegalArgumentException.class}) + public void testComplexEventIntervalBadContig1() { + // Inputs should be just 1 interval + SVCallRecord.ComplexEventInterval.decode("DEL_chr1:100-200,DEL_chr1:300-400", SVTestUtils.hg38Dict); + } + + @Test(expectedExceptions = {IllegalArgumentException.class}) + public void testComplexEventIntervalBadContig2() { + SVCallRecord.ComplexEventInterval.decode("DEL_chrDOESNOTEXIST:100-200", SVTestUtils.hg38Dict); } } \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java index 183d72eec27..2c814a45576 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java @@ -22,8 +22,8 @@ public class SVCallRecordUtilsUnitTest { private static final List ALLELES_DEL = Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL); private static final List ALLELES_INS = Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS); private static final List ALLELES_BND = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.BND_ALLELE); - private static final List ALLELES_CPX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE); private static final List ALLELES_CTX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CTX_ALLELE); + private static final List ALLELES_CPX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE); private static final Map TEST_ATTRIBUTES = Collections.singletonMap("TEST_KEY", "TEST_VAL"); private static final Map TEST_ATTRIBUTES_CPX = Lists.newArrayList( @@ -57,10 +57,10 @@ public class SVCallRecordUtilsUnitTest { .alleles(Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS)).make(); private static final Genotype GENOTYPE_BND_1 = new GenotypeBuilder("sample1") .alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.BND_ALLELE)).make(); - private static final Genotype GENOTYPE_CPX_1 = new GenotypeBuilder("sample1") - .alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE)).make(); private static final Genotype GENOTYPE_CTX_1 = new GenotypeBuilder("sample1") .alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CTX_ALLELE)).make(); + private static final Genotype GENOTYPE_CPX_1 = new GenotypeBuilder("sample1") + .alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE)).make(); private static final Comparator RECORD_COMPARATOR = SVCallRecordUtils.getCallComparator(SVTestUtils.hg38Dict); @@ -69,7 +69,7 @@ public Object[][] testGetVariantBuilderData() { return new Object[][]{ // DEL { - new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, + new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST, ALLELES_DEL, Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), Collections.emptyMap(), Collections.singleton("TEST_FILTER"), Double.valueOf(-3)), @@ -86,7 +86,7 @@ public Object[][] testGetVariantBuilderData() { }, // DEL w/ null ref allele { - new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, + new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST, Collections.singletonList(Allele.SV_SIMPLE_DEL), Collections.singletonList(GENOTYPE_DEL_3), @@ -102,7 +102,7 @@ public Object[][] testGetVariantBuilderData() { }, // INS { - new SVCallRecord("var2", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, 500, + new SVCallRecord("var2", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), 500, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1), @@ -119,7 +119,7 @@ public Object[][] testGetVariantBuilderData() { }, // INS, flipped strands { - new SVCallRecord("var2", "chr1", 1000, false, "chr1", 1000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, 500, + new SVCallRecord("var2", "chr1", 1000, false, "chr1", 1000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), 500, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1), @@ -136,7 +136,7 @@ public Object[][] testGetVariantBuilderData() { }, // INS, undefined length { - new SVCallRecord("var2", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, null, + new SVCallRecord("var2", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1), @@ -153,13 +153,13 @@ public Object[][] testGetVariantBuilderData() { }, // BND { - new SVCallRecord("var3", "chr1", 1000, false, "chr2", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, + new SVCallRecord("var_bnd", "chr1", 1000, false, "chr2", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_BND, Lists.newArrayList(GENOTYPE_BND_1), Collections.emptyMap(), Collections.emptySet(), null), new VariantContextBuilder("", "chr1", 1000, 1000, ALLELES_BND) - .id("var3") + .id("var_bnd") .genotypes(GENOTYPE_BND_1) .attribute(VCFConstants.END_KEY, 1000) .attribute(GATKSVVCFConstants.ALGORITHMS_ATTRIBUTE, SVTestUtils.PESR_ONLY_ALGORITHM_LIST) @@ -170,6 +170,48 @@ public Object[][] testGetVariantBuilderData() { .make(), Collections.emptyList() }, + // CTX + { + new SVCallRecord("var_ctx", "chr1", 1000, false, "chr2", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, null, Collections.emptyList(), null, + SVTestUtils.PESR_ONLY_ALGORITHM_LIST, + ALLELES_CTX, + Lists.newArrayList(GENOTYPE_CTX_1), + Collections.emptyMap(), Collections.emptySet(), null), + new VariantContextBuilder("", "chr1", 1000, 1000, ALLELES_CTX) + .id("var_ctx") + .genotypes(GENOTYPE_CTX_1) + .attribute(VCFConstants.END_KEY, 1000) + .attribute(GATKSVVCFConstants.ALGORITHMS_ATTRIBUTE, SVTestUtils.PESR_ONLY_ALGORITHM_LIST) + .attribute(GATKSVVCFConstants.STRANDS_ATTRIBUTE, "-+") + .attribute(GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX) + .attribute(GATKSVVCFConstants.CONTIG2_ATTRIBUTE, "chr2") + .attribute(GATKSVVCFConstants.END2_ATTRIBUTE, 1999) + .make(), + Collections.emptyList() + }, + // CPX + { + new SVCallRecord("var_cpx", "chr1", 1000, null, "chr1", 1000, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP_iDEL, + Lists.newArrayList(SVCallRecord.ComplexEventInterval.decode("DUP_chr1:5000-5100", SVTestUtils.hg38Dict), SVCallRecord.ComplexEventInterval.decode("DEL_chr2:100-200", SVTestUtils.hg38Dict)), + 100, + SVTestUtils.PESR_ONLY_ALGORITHM_LIST, + ALLELES_CPX, + Lists.newArrayList(GENOTYPE_CPX_1), + Collections.emptyMap(), Collections.emptySet(), null), + new VariantContextBuilder("", "chr1", 1000, 1000, ALLELES_CPX) + .id("var_cpx") + .genotypes(GENOTYPE_CPX_1) + .attribute(VCFConstants.END_KEY, 1000) + .attribute(GATKSVVCFConstants.ALGORITHMS_ATTRIBUTE, SVTestUtils.PESR_ONLY_ALGORITHM_LIST) + .attribute(GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX) + .attribute(GATKSVVCFConstants.CPX_TYPE, GATKSVVCFConstants.ComplexVariantSubtype.dDUP_iDEL.name()) + .attribute(GATKSVVCFConstants.SVLEN, 100) + .attribute(GATKSVVCFConstants.CPX_INTERVALS, "DEL_chr2:100-200,DUP_chr1:5000-5100") + .make(), + Collections.emptyList() + }, }; } @@ -181,7 +223,7 @@ public void testGetVariantBuilder(final SVCallRecord record, final VariantContex @Test public void testGetVariantBuilderHasSanitizedNullAttributes() { - final SVCallRecord record = new SVCallRecord("var3", "chr1", 1000, false, "chr2", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, + final SVCallRecord record = new SVCallRecord("var3", "chr1", 1000, false, "chr2", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_BND, Lists.newArrayList(GENOTYPE_BND_1), @@ -258,14 +300,14 @@ public void testFillMissingSamplesWithGenotypes() { @Test public void testCopyCallWithNewGenotypes() { - final SVCallRecord record = new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, + final SVCallRecord record = new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST, ALLELES_DEL, Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), Collections.singletonMap(GATKSVVCFConstants.CLUSTER_MEMBER_IDS_KEY, Collections.singletonList("sample")), Collections.emptySet(), null); final GenotypesContext genotypes = GenotypesContext.copy(Collections.singletonList(GENOTYPE_DEL_3)); final SVCallRecord result = SVCallRecordUtils.copyCallWithNewGenotypes(record, genotypes); - final SVCallRecord expected = new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, + final SVCallRecord expected = new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST, ALLELES_DEL, genotypes, @@ -407,7 +449,7 @@ public void testConvertInversionsToBreakends() { Assert.assertNotNull(nonInversionResult.get(0)); SVTestUtils.assertEqualsExceptMembership(nonInversionResult.get(0), nonInversion); - final SVCallRecord inversion = new SVCallRecord("", "chr1", 1000, true, "chr1", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.INV, null, 1000, + final SVCallRecord inversion = new SVCallRecord("", "chr1", 1000, true, "chr1", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.INV, null, Collections.emptyList(), 1000, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), @@ -482,7 +524,7 @@ public Object[][] testCreateData() { GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), null, null, TEST_ATTRIBUTES, -90.), - new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, + new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), ALLELES_DEL, Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), TEST_ATTRIBUTES, Collections.emptySet(), -90.) }, @@ -492,7 +534,7 @@ public Object[][] testCreateData() { GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), null, null, TEST_ATTRIBUTES, null), - new SVCallRecord("var2", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, + new SVCallRecord("var2", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), ALLELES_DEL, Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -501,7 +543,7 @@ public Object[][] testCreateData() { ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1, GENOTYPE_INS_2), 500, "+-", GATKSVVCFConstants.StructuralVariantAnnotationType.INS, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, null, null, TEST_ATTRIBUTES, null), - new SVCallRecord("var3", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, 500, + new SVCallRecord("var3", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), 500, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1, GENOTYPE_INS_2), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -510,7 +552,7 @@ public Object[][] testCreateData() { ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1, GENOTYPE_INS_2), 500, "-+", GATKSVVCFConstants.StructuralVariantAnnotationType.INS, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, null, null, TEST_ATTRIBUTES, null), - new SVCallRecord("var4", "chr1", 1000, false, "chr1", 1000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, 500, + new SVCallRecord("var4", "chr1", 1000, false, "chr1", 1000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), 500, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1, GENOTYPE_INS_2), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -519,7 +561,7 @@ public Object[][] testCreateData() { ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1, GENOTYPE_INS_2), -1, "-+", GATKSVVCFConstants.StructuralVariantAnnotationType.INS, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, null, null, TEST_ATTRIBUTES, null), - new SVCallRecord("var4b", "chr1", 1000, false, "chr1", 1000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, null, + new SVCallRecord("var4b", "chr1", 1000, false, "chr1", 1000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1, GENOTYPE_INS_2), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -528,7 +570,7 @@ public Object[][] testCreateData() { ALLELES_BND, Collections.singletonList(GENOTYPE_BND_1), null, "++", GATKSVVCFConstants.StructuralVariantAnnotationType.BND, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, "chrX", 2000, TEST_ATTRIBUTES, null), - new SVCallRecord("var5", "chr1", 1000, true, "chrX", 2000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, + new SVCallRecord("var5", "chr1", 1000, true, "chrX", 2000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_BND, Collections.singletonList(GENOTYPE_BND_1), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -537,7 +579,7 @@ public Object[][] testCreateData() { ALLELES_BND, Collections.singletonList(GENOTYPE_BND_1), null, "++", GATKSVVCFConstants.StructuralVariantAnnotationType.BND, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, "chrX", 2000, TEST_ATTRIBUTES, null), - new SVCallRecord("var6", "chr1", 1000, true, "chrX", 2000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, + new SVCallRecord("var6", "chr1", 1000, true, "chrX", 2000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_BND, Collections.singletonList(GENOTYPE_BND_1), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -546,7 +588,7 @@ public Object[][] testCreateData() { ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), 250, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, "chrX", 2000, TEST_ATTRIBUTES_CPX, null), - new SVCallRecord("var7", "chr1", 1000, null, "chrX", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, GATKSVVCFConstants.ComplexVariantSubtype.dDUP, 250, + new SVCallRecord("var7", "chr1", 1000, null, "chr1", 1000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, GATKSVVCFConstants.ComplexVariantSubtype.dDUP, Collections.emptyList(), 250, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -555,7 +597,7 @@ public Object[][] testCreateData() { ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), 250, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, "chr1", null, TEST_ATTRIBUTES_CPX, null), - new SVCallRecord("var8", "chr1", 1000, null, "chr1", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, GATKSVVCFConstants.ComplexVariantSubtype.dDUP, 250, + new SVCallRecord("var8", "chr1", 1000, null, "chr1", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, GATKSVVCFConstants.ComplexVariantSubtype.dDUP, Collections.emptyList(), 250, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -564,25 +606,43 @@ public Object[][] testCreateData() { ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), 250, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, null, null, TEST_ATTRIBUTES_CPX, null), - new SVCallRecord("var9", "chr1", 1000, null, "chr1", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, GATKSVVCFConstants.ComplexVariantSubtype.dDUP, 250, + new SVCallRecord("var9", "chr1", 1000, null, "chr1", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, GATKSVVCFConstants.ComplexVariantSubtype.dDUP, Collections.emptyList(), 250, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), TEST_ATTRIBUTES, Collections.emptySet(), null) }, { - SVTestUtils.newVariantContext("var10", "chr1", 1000, 1000, + SVTestUtils.newVariantContext("var10", "chr1", 1000, 2000, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), 250, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, - "chrX", 2000, TEST_ATTRIBUTES, null), - new SVCallRecord("var10", "chr1", 1000, null, "chrX", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, null, 250, + "chrX", 2000, + Map.of("TEST_KEY", "TEST_VAL", GATKSVVCFConstants.CPX_TYPE, GATKSVVCFConstants.ComplexVariantSubtype.dDUP_iDEL.name(), GATKSVVCFConstants.CPX_INTERVALS, Arrays.asList("DUP_chr1:100-200", "DEL_chr2:300-400")), + null), + new SVCallRecord("var10", "chr1", 1000, null, "chr1", 2000, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP_iDEL, + Lists.newArrayList(SVCallRecord.ComplexEventInterval.decode("DUP_chr1:100-200", SVTestUtils.hg38Dict), SVCallRecord.ComplexEventInterval.decode("DEL_chr2:300-400", SVTestUtils.hg38Dict)), + 250, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), TEST_ATTRIBUTES, Collections.emptySet(), null) }, { SVTestUtils.newVariantContext("var11", "chr1", 1000, 1000, - ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), -1, null, + ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), null, "++", GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, "chrX", 2000, TEST_ATTRIBUTES_CTX, null), - new SVCallRecord("var11", "chr1", 1000, null, "chrX", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, null, + new SVCallRecord("var11", "chr1", 1000, true, "chrX", 2000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, Collections.emptyList(), null, + SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), + TEST_ATTRIBUTES, Collections.emptySet(), null) + }, + // CPX with a null value in the CPX_INTERVALS list + { + SVTestUtils.newVariantContext("var12", "chr1", 1000, 1000, + ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), null, "++", + GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, + "chrX", 2000, + Map.of("TEST_KEY", "TEST_VAL", GATKSVVCFConstants.CPX_TYPE, "CTX_PP/QQ", GATKSVVCFConstants.CPX_INTERVALS, Collections.emptyList()), + null), + new SVCallRecord("var12", "chr1", 1000, true, "chrX", 2000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), TEST_ATTRIBUTES, Collections.emptySet(), null) }, @@ -592,11 +652,11 @@ public Object[][] testCreateData() { @Test(dataProvider= "testCreateData") public void testCreate(final VariantContext variant, final SVCallRecord expected) { final List excludedAttributes = new ArrayList<>(variant.getAttributes().keySet()); - final SVCallRecord resultDropAttr = SVCallRecordUtils.create(variant, false); + final SVCallRecord resultDropAttr = SVCallRecordUtils.create(variant, false, SVTestUtils.hg38Dict); SVTestUtils.assertEqualsExceptExcludedAttributes(resultDropAttr, expected, excludedAttributes); Assert.assertTrue(resultDropAttr.getAttributes().isEmpty()); - final SVCallRecord resultKeepAttr = SVCallRecordUtils.create(variant, true); + final SVCallRecord resultKeepAttr = SVCallRecordUtils.create(variant, true, SVTestUtils.hg38Dict); SVTestUtils.assertEqualsExceptExcludedAttributes(resultKeepAttr, expected, Collections.emptyList()); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java index 7f9dbb928ce..35959b472d2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java @@ -8,6 +8,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVType; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; import org.broadinstitute.hellbender.tools.sv.cluster.*; import org.broadinstitute.hellbender.utils.GenomeLoc; @@ -19,6 +20,8 @@ import java.util.*; import java.util.stream.Collectors; +import static org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants.CPX_SV_SYB_ALT_ALLELE_STR; + public class SVTestUtils { public static final ReferenceSequenceFile hg38Reference = ReferenceUtils.createReferenceReader(new GATKPath(GATKBaseTest.hg38Reference)); @@ -35,6 +38,7 @@ public class SVTestUtils { public static final String PESR_ALGORITHM = "pesr"; public static final List DEPTH_ONLY_ALGORITHM_LIST = Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM); public static final List PESR_ONLY_ALGORITHM_LIST = Collections.singletonList(PESR_ALGORITHM); + public static final Allele CPX_ALLELE = Allele.create(SimpleSVType.createBracketedSymbAlleleString(CPX_SV_SYB_ALT_ALLELE_STR)); public static CanonicalSVLinkage getNewDefaultLinkage() { final CanonicalSVLinkage linkage = new CanonicalSVLinkage<>(SVTestUtils.hg38Dict, false); @@ -134,7 +138,7 @@ public final static SVCallRecord makeRecord(final String id, for (final GenotypeBuilder builder : genotypeBuilders) { genotypes.add(makeGenotypeWithRefAllele(builder, refAllele)); } - return new SVCallRecord(id, contigA, positionA, strandA, contigB, positionB, strandB, type, null, length, algorithms, + return new SVCallRecord(id, contigA, positionA, strandA, contigB, positionB, strandB, type, null, Collections.emptyList(), length, algorithms, newAlleles, genotypes, Collections.emptyMap(), Collections.emptySet(), null, hg38Dict); } @@ -286,6 +290,8 @@ public static void assertEqualsExceptExcludedAttributes(final SVCallRecord one, Assert.assertEquals(one.getId(), two.getId()); Assert.assertEquals(one.getLength(), two.getLength()); Assert.assertEquals(one.getType(), two.getType()); + Assert.assertEquals(one.getComplexSubtype(), two.getComplexSubtype()); + Assert.assertEquals(one.getComplexEventIntervals(), two.getComplexEventIntervals()); Assert.assertEquals(one.getGenotypes().size(), two.getGenotypes().size()); for (int i = 0; i < one.getGenotypes().size(); i++) { final Genotype g1 = ignoreGT ? nullGT(one.getGenotypes().get(i)) : one.getGenotypes().get(i); @@ -372,7 +378,7 @@ public static SVCallRecord newCallRecordWithAllelesAndSampleName(final String sa builder = builder.attribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT, copyNumber); } return new SVCallRecord("", "chr1", 100, getValidTestStrandA(svtype), "chr1", 199, getValidTestStrandB(svtype), - svtype, null, 100, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), + svtype, null, Collections.emptyList(), 100, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), variantAlleles, Collections.singletonList(builder.make()), Collections.emptyMap(), Collections.emptySet(), null); @@ -380,7 +386,7 @@ public static SVCallRecord newCallRecordWithAllelesAndSampleName(final String sa public static SVCallRecord newNamedDeletionRecordWithAttributes(final String id, final Map attributes) { return new SVCallRecord(id, "chr1", 100, true, "chr1", 199, false, - GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 100, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), @@ -391,7 +397,7 @@ public static SVCallRecord newNamedDeletionRecordWithAttributesAndGenotypes(fina final List genotypes, final Map attributes) { return new SVCallRecord(id, "chr1", 100, true, "chr1", 199, false, - GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 100, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), genotypes, @@ -410,40 +416,40 @@ public static final Map keyValueArraysToMap(final String[] keys, public static SVCallRecord newCallRecordWithLengthAndType(final Integer length, final GATKSVVCFConstants.StructuralVariantAnnotationType svtype) { final int positionB = length == null ? 1 : CoordMath.getEnd(1, length); return new SVCallRecord("", "chr1", 1, getValidTestStrandA(svtype), "chr1", positionB, getValidTestStrandB(svtype), - svtype, null, length, PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), + svtype, null, Collections.emptyList(), length, PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newDeletionCallRecordWithIdAndAlgorithms(final String id, final List algorithms) { return new SVCallRecord(id, "chr1", 1, true, "chr1", 100, false, - GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 100, algorithms, Collections.emptyList(), + GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 100, algorithms, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } // Note strands and length may not be set properly public static SVCallRecord newPESRCallRecordWithIntervalAndType(final int start, final int end, final GATKSVVCFConstants.StructuralVariantAnnotationType svtype) { return new SVCallRecord("", "chr1", start, getValidTestStrandA(svtype), "chr1", end, getValidTestStrandB(svtype), - svtype, null, getLength(start, end, svtype), PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), + svtype, null, Collections.emptyList(), getLength(start, end, svtype), PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } // Note strands and length may not be set properly public static SVCallRecord newInsertionWithPositionAndLength(final int start, final int length) { return new SVCallRecord("", "chr1", start, true, "chr1", start + 1, false, - GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, length, PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), + GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), length, PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newDepthCallRecordWithIntervalAndType(final int start, final int end, final GATKSVVCFConstants.StructuralVariantAnnotationType svtype) { return new SVCallRecord("", "chr1", start, getValidTestStrandA(svtype), "chr1", end, getValidTestStrandB(svtype), - svtype, null, getLength(start, end, svtype), DEPTH_ONLY_ALGORITHM_LIST, Collections.emptyList(), + svtype, null, Collections.emptyList(), getLength(start, end, svtype), DEPTH_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } // Note strands and length may not be set properly public static SVCallRecord newCallRecordWithContigsIntervalAndType(final String startContig, final int start, final String endContig, final int end, final GATKSVVCFConstants.StructuralVariantAnnotationType svtype) { return new SVCallRecord("", startContig, start, getValidTestStrandA(svtype), endContig, end, getValidTestStrandB(svtype), - svtype, null, getLength(start, end, svtype), PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), + svtype, null, Collections.emptyList(), getLength(start, end, svtype), PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } @@ -457,7 +463,7 @@ public static Integer getLength(final int start, final int end, final GATKSVVCFC } public static SVCallRecord newBndCallRecordWithStrands(final boolean strandA, final boolean strandB) { - return new SVCallRecord("", "chr1", 1000, strandA, "chr1", 1000, strandB, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, + return new SVCallRecord("", "chr1", 1000, strandA, "chr1", 1000, strandB, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, Collections.singletonList(PESR_ALGORITHM), Collections.emptyList(), Collections.emptyList(), @@ -465,7 +471,7 @@ public static SVCallRecord newBndCallRecordWithStrands(final boolean strandA, fi } public static SVCallRecord newCtxCallRecord() { - return new SVCallRecord("", "chr1", 1000, null, "chr1", 1000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, null, null, + return new SVCallRecord("", "chr1", 1000, null, "chr1", 1000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, null, Collections.emptyList(), null, Collections.singletonList(PESR_ALGORITHM), Collections.emptyList(), Collections.emptyList(), @@ -473,7 +479,7 @@ public static SVCallRecord newCtxCallRecord() { } public static SVCallRecord newCpxCallRecordWithLength(final int length) { - return new SVCallRecord("", "chr1", 1000, null, "chr1", 1000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, null, length, + return new SVCallRecord("", "chr1", 1000, null, "chr1", 1000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, null, Collections.emptyList(), length, Collections.singletonList(PESR_ALGORITHM), Collections.emptyList(), Collections.emptyList(), @@ -481,7 +487,7 @@ public static SVCallRecord newCpxCallRecordWithLength(final int length) { } public static SVCallRecord newCnvCallRecordWithStrands(final Boolean strandA, final Boolean strandB) { - return new SVCallRecord("", "chr1", 1000, strandA, "chr1", 1999, strandB, GATKSVVCFConstants.StructuralVariantAnnotationType.CNV, null, 1000, + return new SVCallRecord("", "chr1", 1000, strandA, "chr1", 1999, strandB, GATKSVVCFConstants.StructuralVariantAnnotationType.CNV, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), @@ -489,7 +495,7 @@ public static SVCallRecord newCnvCallRecordWithStrands(final Boolean strandA, fi } public static SVCallRecord newCallRecordWithCoordinates(final String id, final String chrA, final int posA, final String chrB, final int posB) { - return new SVCallRecord(id, chrA, posA, true, chrB, posB, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, + return new SVCallRecord(id, chrA, posA, true, chrB, posB, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, Collections.singletonList("peser"), Collections.emptyList(), Collections.emptyList(), @@ -497,7 +503,7 @@ public static SVCallRecord newCallRecordWithCoordinates(final String id, final S } public static SVCallRecord newCallRecordWithCoordinatesAndType(final String id, final String chrA, final int posA, final String chrB, final int posB, final GATKSVVCFConstants.StructuralVariantAnnotationType type) { - return new SVCallRecord(id, chrA, posA, true, chrB, posB, false, type, null, getLength(posA, posB, type), + return new SVCallRecord(id, chrA, posA, true, chrB, posB, false, type, null, Collections.emptyList(), getLength(posA, posB, type), Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), @@ -505,7 +511,7 @@ public static SVCallRecord newCallRecordWithCoordinatesAndType(final String id, } public static SVCallRecord newCallRecordWithAlgorithms(final List algorithms) { - return new SVCallRecord("", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, length, + return new SVCallRecord("", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), length, algorithms, Collections.emptyList(), Collections.emptyList(), @@ -513,7 +519,7 @@ public static SVCallRecord newCallRecordWithAlgorithms(final List algori } public static SVCallRecord newCallRecordInsertionWithLength(final Integer length) { - return new SVCallRecord("", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, length, + return new SVCallRecord("", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), length, PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CNVDefragmenterTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CNVDefragmenterTest.java index 56ab174a353..012c302bd83 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CNVDefragmenterTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CNVDefragmenterTest.java @@ -20,47 +20,47 @@ public class CNVDefragmenterTest { @Test public void testClusterTogether() { - final SVCallRecord deletion = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord deletion = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); - final SVCallRecord duplication = new SVCallRecord("test_dup", "chr1", 1000, false, "chr1", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, null, + final SVCallRecord duplication = new SVCallRecord("test_dup", "chr1", 1000, false, "chr1", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DUP), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(deletion, duplication), "Different sv types should not cluster"); - final SVCallRecord duplicationNonDepthOnly = new SVCallRecord("test_dup", "chr1", 1000, false, "chr1", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, null, + final SVCallRecord duplicationNonDepthOnly = new SVCallRecord("test_dup", "chr1", 1000, false, "chr1", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, null, Collections.emptyList(), 1000, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM, SVTestUtils.PESR_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DUP), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(duplication, duplicationNonDepthOnly), "Clustered records must be depth-only"); - final SVCallRecord cnv = new SVCallRecord("test_cnv", "chr1", 1000, null, "chr1", 1999, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CNV, null, + final SVCallRecord cnv = new SVCallRecord("test_cnv", "chr1", 1000, null, "chr1", 1999, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CNV, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DUP), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(deletion, cnv), "Different sv types should not cluster"); - final SVCallRecord insertion = new SVCallRecord("test_ins", "chr1", 1000, true, "chr1", 1001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, + final SVCallRecord insertion = new SVCallRecord("test_ins", "chr1", 1000, true, "chr1", 1001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, Collections.emptyList(), 1000, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(insertion, insertion), "Only CNVs should be valid"); - final SVCallRecord deletion2 = new SVCallRecord("test_del2", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord deletion2 = new SVCallRecord("test_del2", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertTrue(defragmenter.areClusterable(deletion, deletion2), "Valid identical records should cluster"); - final SVCallRecord deletion3 = new SVCallRecord("test_del3", "chr1", 2999, true, "chr1", 3998, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord deletion3 = new SVCallRecord("test_del3", "chr1", 2999, true, "chr1", 3998, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertTrue(defragmenter.areClusterable(deletion, deletion3), "Should cluster due to overlap"); - final SVCallRecord deletion4 = new SVCallRecord("test_del3", "chr1", 3000, true, "chr1", 3999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord deletion4 = new SVCallRecord("test_del3", "chr1", 3000, true, "chr1", 3999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); @@ -189,7 +189,7 @@ public Object[][] recordPairs() { @Test(dataProvider= "maxPositionIntervals") public void testGetMaxClusterableStartingPosition(final int start, final int end) { - final SVCallRecord call1 = new SVCallRecord("call1", "chr1", start, true, "chr1", end, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord call1 = new SVCallRecord("call1", "chr1", start, true, "chr1", end, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), end - start + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); @@ -197,7 +197,7 @@ public void testGetMaxClusterableStartingPosition(final int start, final int end final int call2Start = maxClusterableStart; final int call2End = dictionary.getSequence(call1.getContigA()).getSequenceLength(); - final SVCallRecord call2 = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2End, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord call2 = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2End, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), call2End - call2Start + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); @@ -205,7 +205,7 @@ public void testGetMaxClusterableStartingPosition(final int start, final int end final int call3Start = maxClusterableStart + 1; final int call3End = dictionary.getSequence(call1.getContigA()).getSequenceLength(); - final SVCallRecord call3 = new SVCallRecord("call3", "chr1", call3Start, true, "chr1", call3End, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord call3 = new SVCallRecord("call3", "chr1", call3Start, true, "chr1", call3End, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), call3End - call3Start + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java index fe34ea6f6eb..f46e4a6e7c9 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java @@ -17,10 +17,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -1578,4 +1575,27 @@ public void collapseAlgorithmsTest(final List> algorithmLists, fina final List records = algorithmLists.stream().map(list -> SVTestUtils.newDeletionCallRecordWithIdAndAlgorithms("", list)).collect(Collectors.toList()); Assert.assertEquals(collapser.collapseAlgorithms(records), expectedResult); } + + @Test + public void testComplexSubtypeAndIntervals() { + final SVCallRecord cpx1 = new SVCallRecord("cpx1", "chr1", 1000, null, + "chr1", 1000, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, + Arrays.asList(SVCallRecord.ComplexEventInterval.decode("DUP_chr1:6000-8000", SVTestUtils.hg38Dict)), + null, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), + Lists.newArrayList(Allele.REF_N, SVTestUtils.CPX_ALLELE), + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); + final SVCallRecord cpx2 = new SVCallRecord("cpx1", "chr1", 1000, null, + "chr1", 1000, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, + Arrays.asList(SVCallRecord.ComplexEventInterval.decode("DUP_chr1:6000-8000", SVTestUtils.hg38Dict)), + null, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), + Lists.newArrayList(Allele.REF_N, SVTestUtils.CPX_ALLELE), + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); + final SVCallRecord result = collapser.collapse(new SVClusterEngine.OutputCluster(Lists.newArrayList(cpx1, cpx2))); + Assert.assertEquals(result.getComplexSubtype(), GATKSVVCFConstants.ComplexVariantSubtype.dDUP); + Assert.assertEquals(result.getComplexEventIntervals(), cpx1.getComplexEventIntervals()); + } } \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java index 942feab95f6..38849f533bb 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java @@ -6,6 +6,7 @@ import org.broadinstitute.hellbender.tools.sv.SVCallRecord; import org.broadinstitute.hellbender.tools.sv.SVCallRecordUtils; import org.broadinstitute.hellbender.tools.sv.SVTestUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import org.testng.Assert; import org.testng.TestException; @@ -31,6 +32,22 @@ public class SVClusterEngineTest { private final CanonicalSVLinkage linkageSizeSimilarity = new CanonicalSVLinkage<>(SVTestUtils.hg38Dict, false); private final SVClusterEngine engineSizeSimilarity = new SVClusterEngine(SVClusterEngine.CLUSTERING_TYPE.SINGLE_LINKAGE, SVTestUtils.defaultCollapser::collapse, linkageSizeSimilarity, SVTestUtils.hg38Dict); + // Assign length according to coordinate scheme + private static Integer inferLength(final String contigA, final int posA, final String contigB, final int posB) { + if (contigA.equals(contigB)) { + if (posA == posB) { + // no interval + return null; + } else { + // intrachromosomal and intervaled + return posB - posA; + } + } else { + // interchromosomal + return null; + } + } + @BeforeTest public void initializeClusterEngine() { engine.add(SVTestUtils.call1); @@ -148,11 +165,13 @@ public void testClusterTogetherWithSizeSimilarityInsertions(final int start1, fi @Test(expectedExceptions = { IllegalArgumentException.class }) public void testClusterTogetherInvalidInterval() { // End position beyond contig end after padding - final SVCallRecord deletion1 = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 248956423 + SVTestUtils.defaultEvidenceParameters.getWindow(), false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord deletion1 = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 248956423 + SVTestUtils.defaultEvidenceParameters.getWindow(), false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, + null, Collections.emptyList(), null, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); - final SVCallRecord deletion2 = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 248956422, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord deletion2 = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 248956422, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, + null, Collections.emptyList(), null, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); @@ -183,29 +202,34 @@ public void testGetMaxClusterableStartingPosition(final int start, final int end } private void testGetMaxClusterableStartingPositionWithAlgorithm(final int start, final int end, final String algorithm) { - final SVCallRecord call1 = new SVCallRecord("call1", "chr1", start, true, "chr1", end, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord call1 = new SVCallRecord("call1", "chr1", start, true, "chr1", end, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, + null, Collections.emptyList(), end - start + 1, Collections.singletonList(algorithm), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final int maxClusterableStart = engine.getLinkage().getMaxClusterableStartingPosition(call1); final int call2Start = maxClusterableStart; - final SVCallRecord call2Depth = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord call2Depth = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, + null, Collections.emptyList(), call1.getLength(), Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); - final SVCallRecord call2Pesr = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord call2Pesr = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, + null, Collections.emptyList(), call1.getLength(), SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); Assert.assertTrue(engine.getLinkage().areClusterable(call1, call2Depth) || engine.getLinkage().areClusterable(call1, call2Pesr)); final int call3Start = maxClusterableStart + 1; - final SVCallRecord call3Depth = new SVCallRecord("call2", "chr1", call3Start, true, "chr1", call3Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord call3Depth = new SVCallRecord("call2", "chr1", call3Start, true, "chr1", call3Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, + null, Collections.emptyList(), call1.getLength(), Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); - final SVCallRecord call3Pesr = new SVCallRecord("call2", "chr1", call3Start, true, "chr1", call3Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + final SVCallRecord call3Pesr = new SVCallRecord("call2", "chr1", call3Start, true, "chr1", call3Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, + null, Collections.emptyList(), call1.getLength(), SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); @@ -262,12 +286,12 @@ public Object[][] clusterTogetherVaryPositionsProvider() { public void testClusterTogetherVaryPositions(final int start1, final int end1, final int start2, final int end2, final boolean result) { final SVCallRecord call1 = new SVCallRecord("call1", "chr1", start1, true, "chr1", end1, false, - GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, end1 - start1 + 1, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, + GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), end1 - start1 + 1, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DUP), SVTestUtils.threeGenotypes, Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call2 = new SVCallRecord("call2", "chr1", start2, true, "chr1", end2, false, - GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, end2 - start2 + 1, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), + GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), end2 - start2 + 1, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DUP), SVTestUtils.threeGenotypes, Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); Assert.assertEquals(engine.getLinkage().areClusterable(call1, call2), result); @@ -278,12 +302,12 @@ public void testClusterTogetherVaryTypes() { for (final GATKSVVCFConstants.StructuralVariantAnnotationType type1 : GATKSVVCFConstants.StructuralVariantAnnotationType.values()) { // Pass in null strands to let them be determined automatically final SVCallRecord call1 = new SVCallRecord("call1", "chr1", 1000, SVTestUtils.getValidTestStrandA(type1), - "chr1", 2001, SVTestUtils.getValidTestStrandB(type1), type1, null, + "chr1", 2001, SVTestUtils.getValidTestStrandB(type1), type1, null, Collections.emptyList(), SVTestUtils.getLength(1000, 2001, type1), Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); for (final GATKSVVCFConstants.StructuralVariantAnnotationType type2 : GATKSVVCFConstants.StructuralVariantAnnotationType.values()) { final SVCallRecord call2 = new SVCallRecord("call2", "chr1", 1000, SVTestUtils.getValidTestStrandA(type2), - "chr1", 2001, SVTestUtils.getValidTestStrandB(type2), type2, null, + "chr1", 2001, SVTestUtils.getValidTestStrandB(type2), type2, null, Collections.emptyList(), SVTestUtils.getLength(1000, 2001, type2), Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // Should only cluster together if same type, except CNVs @@ -303,13 +327,13 @@ public void testClusterTogetherVaryStrands() { for (final Boolean strand1A : bools) { for (final Boolean strand1B : bools) { final SVCallRecord call1 = new SVCallRecord("call1", "chr1", 1000, strand1A, - "chr1", 2001, strand1B, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, + "chr1", 2001, strand1B, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); for (final Boolean strand2A : bools) { for (final Boolean strand2B : bools) { final SVCallRecord call2 = new SVCallRecord("call2", "chr1", 1000, strand2A, - "chr1", 2001, strand2B, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, + "chr1", 2001, strand2B, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // Should only cluster if strands match @@ -328,7 +352,7 @@ public void testClusterTogetherVaryContigs() { for (int j = i; j < contigs.size(); j++) { final String contig1B = contigs.get(j); final SVCallRecord call1 = new SVCallRecord("call1", contig1A, 1000, true, - contig1B, 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, + contig1B, 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); for (int k = 0; k < contigs.size(); k++) { @@ -336,7 +360,7 @@ public void testClusterTogetherVaryContigs() { for (int m = k; m < contigs.size(); m++) { final String contig2B = contigs.get(m); final SVCallRecord call2 = new SVCallRecord("call2", contig2A, 1000, true, - contig2B, 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, + contig2B, 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, Collections.emptyList(), null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // Should only cluster if contigs match @@ -356,11 +380,11 @@ public void testClusterTogetherVaryAlgorithms() { ); for (final List algorithms1 : algorithmsList) { final SVCallRecord call1 = new SVCallRecord("call1", "chr1", 1000, true, - "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1002, algorithms1, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); for (final List algorithms2 : algorithmsList) { final SVCallRecord call2 = new SVCallRecord("call2", "chr1", 1000, true, - "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1002, algorithms2, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // All combinations should cluster Assert.assertTrue(engine.getLinkage().areClusterable(call1, call2)); @@ -400,15 +424,193 @@ public void testClusterTogetherCNVs() { Assert.assertFalse(engine.getLinkage().areClusterable(del1, dup1)); } + @DataProvider(name = "testClusterTogetherIntervaledComplexData") + public Object[][] testClusterTogetherIntervaledComplexData() { + return new Object[][]{ + // exact match + {"chr1", 1000, "chr1", 2000, + GATKSVVCFConstants.ComplexVariantSubtype.delINV, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 1100, 1500)), + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr1", 1600, 1900)) + ), + true + }, + // match within parameters + {"chr1", 1100, "chr1", 1900, + GATKSVVCFConstants.ComplexVariantSubtype.delINV, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 1150, 1550)), + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr1", 1650, 1800)) + ), + true + }, + // different contigs + {"chr2", 1000, "chr2", 2000, + GATKSVVCFConstants.ComplexVariantSubtype.delINV, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 1100, 1500)), + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr1", 1600, 1900)) + ), + false + }, + // different coordinates + {"chr1", 1600, "chr1", 2400, + GATKSVVCFConstants.ComplexVariantSubtype.delINV, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 1100, 1500)), + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr1", 1600, 1900)) + ), + false + }, + // different subtypes + {"chr1", 1000, "chr1", 2000, + GATKSVVCFConstants.ComplexVariantSubtype.delINVdel, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 1100, 1500)), + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr1", 1600, 1900)) + ), + false + }, + // different number of intervals + {"chr1", 1000, "chr1", 2000, + GATKSVVCFConstants.ComplexVariantSubtype.delINV, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 1100, 1500)) + ), + false + }, + // different cpx intervals + {"chr1", 1000, "chr1", 2000, + GATKSVVCFConstants.ComplexVariantSubtype.delINV, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 800, 1100)), + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr1", 1600, 1900)) + ), + false + }, + // second cpx interval type is different + {"chr1", 1000, "chr1", 2000, + GATKSVVCFConstants.ComplexVariantSubtype.delINV, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, new SimpleInterval("chr1", 1100, 1500)), + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr1", 1600, 1900)) + ), + false + }, + }; + } + + @Test(dataProvider= "testClusterTogetherIntervaledComplexData") + public void testClusterTogetherIntervaledComplex(final String contigA, final int posA, final String contigB, final int posB, + final GATKSVVCFConstants.ComplexVariantSubtype subtype, + final List cpxIntervals, final boolean expected) { + final SVCallRecord cpx1 = new SVCallRecord("cpx1", "chr1", 1000, null, + "chr1", 2000, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + GATKSVVCFConstants.ComplexVariantSubtype.delINV, + Arrays.asList(SVCallRecord.ComplexEventInterval.decode("DEL_chr1:1100-1500", SVTestUtils.hg38Dict), SVCallRecord.ComplexEventInterval.decode("INV_chr1:1600-1900", SVTestUtils.hg38Dict)), + 1000, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), + Lists.newArrayList(Allele.REF_N, SVTestUtils.CPX_ALLELE), + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); + final Integer length2 = inferLength(contigA, posA, contigB, posB); + final SVCallRecord cpx2 = new SVCallRecord("cpx2", contigA, posA, null, + contigB, posB, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + subtype, + cpxIntervals, + length2, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), + Lists.newArrayList(Allele.REF_N, SVTestUtils.CPX_ALLELE), + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); + Assert.assertEquals(engine.getLinkage().areClusterable(cpx1, cpx2), expected); + } + + @DataProvider(name = "testClusterTogetherInsertedComplexData") + public Object[][] testClusterTogetherInsertedComplexData() { + return new Object[][]{ + // exact match + {"chr1", 1000, "chr1", 1000, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr1", 6000, 8000)) + ), + true + }, + // close match + {"chr1", 1010, "chr1", 1010, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr1", 6200, 7990)) + ), + true + }, + // not match by coordinates + {"chr1", 2000, "chr1", 3000, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr1", 6200, 7990)) + ), + false + }, + // not match by subtype + {"chr1", 1000, "chr1", 1000, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP_iDEL, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr1", 6000, 8000)) + ), + false + }, + // not match by cpx interval + {"chr1", 1000, "chr1", 1000, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr1", 9000, 11000)) + ), + false + }, + // different cpx interval type + {"chr1", 1000, "chr1", 1000, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, + Arrays.asList( + new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr1", 6000, 8000)) + ), + false + }, + }; + } + + @Test(dataProvider= "testClusterTogetherInsertedComplexData") + public void testClusterTogetherInsertedComplex(final String contigA, final int posA, final String contigB, final int posB, + final GATKSVVCFConstants.ComplexVariantSubtype subtype, + final List cpxIntervals, final boolean expected) { + final SVCallRecord cpx1 = new SVCallRecord("cpx1", "chr1", 1000, null, + "chr1", 1000, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, + Arrays.asList(new SVCallRecord.ComplexEventInterval(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr1", 6000, 8000))), + 2000, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), + Lists.newArrayList(Allele.REF_N, SVTestUtils.CPX_ALLELE), + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); + final Integer length2 = cpxIntervals.get(0).getInterval().size(); + final SVCallRecord cpx2 = new SVCallRecord("cpx2", contigA, posA, null, + contigB, posB, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + subtype, + cpxIntervals, + length2, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), + Lists.newArrayList(Allele.REF_N, SVTestUtils.CPX_ALLELE), + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); + Assert.assertEquals(engine.getLinkage().areClusterable(cpx1, cpx2), expected); + } @Test public void testClusterTogetherVaryParameters() { final SVClusterEngine testEngine1 = SVTestUtils.getNewDefaultSingleLinkageEngine(); final SVCallRecord call1 = new SVCallRecord("call1", "chr1", 1000, true, - "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1002, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call2 = new SVCallRecord("call2", "chr1", 1100, true, - "chr1", 2101, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + "chr1", 2101, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), 1002, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // Cluster with default parameters Assert.assertTrue(testEngine1.getLinkage().areClusterable(call1, call2)); @@ -447,15 +649,15 @@ public void testAddVaryPositions(final int positionA1, final int positionB1, throw new TestException("Unimplemented clustering type " + type.name()); } final SVCallRecord call1 = new SVCallRecord("call1", "chr1", positionA1, true, - "chr1", positionB1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + "chr1", positionB1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), positionB1 - positionA1 + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call2 = new SVCallRecord("call1", "chr1", positionA2, true, - "chr1", positionB2, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + "chr1", positionB2, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), positionB2 - positionA2 + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call3 = new SVCallRecord("call1", "chr1", positionA3, true, - "chr1", positionB3, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, + "chr1", positionB3, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, Collections.emptyList(), positionB3 - positionA3 + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); engine.add(call1); @@ -584,7 +786,7 @@ public void testGetCarrierSamplesBiallelic(final int ploidy, final Allele refAll .make()) .collect(Collectors.toList()); final SVCallRecord recordWithCopyNumber = new SVCallRecord("", "chr1", 1000, SVTestUtils.getValidTestStrandA(svtype), - "chr1", 1999, SVTestUtils.getValidTestStrandB(svtype), svtype, null, + "chr1", 1999, SVTestUtils.getValidTestStrandB(svtype), svtype, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), alleles, GenotypesContext.copy(genotypesWithCopyNumber), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final Set resultWithCopyNumber = recordWithCopyNumber.getCarrierSampleSet(); @@ -599,7 +801,7 @@ public void testGetCarrierSamplesBiallelic(final int ploidy, final Allele refAll .make()) .collect(Collectors.toList()); final SVCallRecord recordWithGenotype = new SVCallRecord("", "chr1", 1000, SVTestUtils.getValidTestStrandA(svtype), - "chr1", 1999, SVTestUtils.getValidTestStrandB(svtype), svtype, null, + "chr1", 1999, SVTestUtils.getValidTestStrandB(svtype), svtype, null, Collections.emptyList(), 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), alleles, GenotypesContext.copy(genotypesWithGenotype), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final Set resultWithGenotype = recordWithGenotype.getCarrierSampleSet(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngineUnitTest.java index dbcf2152b8f..50ab390bbd5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngineUnitTest.java @@ -245,11 +245,11 @@ public void testAnnotatePointSVTypes( * @param intervals - list of intervals * @return - list of SV segments with provided SV type, one for each interval */ - private List createListOfSVSegments(final GATKSVVCFConstants.StructuralVariantAnnotationType svType, - final SimpleInterval[] intervals) { - final List segments = new ArrayList<>(intervals.length); + private List createListOfSVSegments(final GATKSVVCFConstants.StructuralVariantAnnotationType svType, + final SimpleInterval[] intervals) { + final List segments = new ArrayList<>(intervals.length); for (final SimpleInterval interval : intervals) { - segments.add(new SVAnnotateEngine.SVSegment(svType, interval)); + segments.add(new SVSegment(svType, interval)); } return segments; } @@ -260,12 +260,12 @@ private List createListOfSVSegments(final GATKSVVCFC * @param intervals - list of intervals * @return - list of SV segments */ - private List createListOfSVSegmentsDifferentTypes(final GATKSVVCFConstants.StructuralVariantAnnotationType[] svTypes, - final SimpleInterval[] intervals) { + private List createListOfSVSegmentsDifferentTypes(final GATKSVVCFConstants.StructuralVariantAnnotationType[] svTypes, + final SimpleInterval[] intervals) { Assert.assertEquals(svTypes.length, intervals.length); - final List segments = new ArrayList<>(intervals.length); + final List segments = new ArrayList<>(intervals.length); for (int i = 0; i < svTypes.length; i++) { - segments.add(new SVAnnotateEngine.SVSegment(svTypes[i], intervals[i])); + segments.add(new SVSegment(svTypes[i], intervals[i])); } return segments; } @@ -352,10 +352,10 @@ public Object[][] getComplexVariantIntervalsTestData() { public void testGetSegmentsFromComplexIntervals( final GATKSVVCFConstants.ComplexVariantSubtype complexType, final String cpxIntervalsString, - final List expectedSVSegments + final List expectedSVSegments ) { - final List cpxIntervals = SVAnnotateEngine.parseComplexIntervals(Arrays.asList(cpxIntervalsString.split(","))); - final List actualSegments = SVAnnotateEngine.getComplexAnnotationIntervals(cpxIntervals, + final List cpxIntervals = SVAnnotateEngine.parseComplexIntervals(Arrays.asList(cpxIntervalsString.split(","))); + final List actualSegments = SVAnnotateEngine.getComplexAnnotationIntervals(cpxIntervals, complexType); assertSegmentListEqual(actualSegments, expectedSVSegments); } @@ -389,9 +389,9 @@ public void testAnnotateComplexEvents( SVAnnotateEngine svAnnotateEngine = new SVAnnotateEngine(gtfTrees, null, sequenceDictionary, -1); - final List cpxIntervals = SVAnnotateEngine.parseComplexIntervals(Arrays.asList(cpxIntervalsString.split(","))); - final List cpxSegments = SVAnnotateEngine.getComplexAnnotationIntervals(cpxIntervals, complexType); - for (final SVAnnotateEngine.SVSegment cpxSegment: cpxSegments) { + final List cpxIntervals = SVAnnotateEngine.parseComplexIntervals(Arrays.asList(cpxIntervalsString.split(","))); + final List cpxSegments = SVAnnotateEngine.getComplexAnnotationIntervals(cpxIntervals, complexType); + for (final SVSegment cpxSegment: cpxSegments) { svAnnotateEngine.annotateGeneOverlaps(cpxSegment.getInterval(), cpxSegment.getIntervalSVType(), true, variantConsequenceDict); } @@ -503,15 +503,15 @@ public void testIncludesDispersedDuplication( * @param segmentsA - first list of SVSegments to compare * @param segmentsB - second list of SVSegments to compare */ - private void assertSegmentListEqual(final List segmentsA, - final List segmentsB) { + private void assertSegmentListEqual(final List segmentsA, + final List segmentsB) { final int lengthA = segmentsA.size(); if (lengthA != segmentsB.size()) { Assert.fail("Segment lists differ in length"); } for (int i = 0; i < lengthA; i++) { - SVAnnotateEngine.SVSegment segmentA = segmentsA.get(i); - SVAnnotateEngine.SVSegment segmentB = segmentsB.get(i); + SVSegment segmentA = segmentsA.get(i); + SVSegment segmentB = segmentsB.get(i); if (!segmentA.equals(segmentB)) { Assert.fail("Segment items differ"); } @@ -574,14 +574,14 @@ public Object[][] getSVTypesAndSegmentsTestData() { "G]chr19:424309]", null, null,"CTX_PP/QQ", null), GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, - Arrays.asList(new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, + Arrays.asList(new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, new SimpleInterval("chr2", 86263976, 86263976))), null}, { createVariantContext("chr2", 86263977, 86263977, null, 424309, "A", "[chr19:424310[A", -1, null, "CTX_PP/QQ", null), GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, - Arrays.asList(new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, + Arrays.asList(new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, new SimpleInterval("chr2", 86263977, 86263977))), null }, { createVariantContext("chr4", 21923233, 22506191, "chr8", 107912008, "N", @@ -589,36 +589,36 @@ public Object[][] getSVTypesAndSegmentsTestData() { Collections.singletonList("INV_chr4:21923233-22506191")), GATKSVVCFConstants.ComplexVariantSubtype.CTX_INV, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, - Arrays.asList(new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, + Arrays.asList(new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr4", 21923233, 22506191)), - new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, + new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, new SimpleInterval("chr4", 21923233, 21923233)), - new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, + new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, new SimpleInterval("chr4", 22506191, 22506191)), - new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, + new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, new SimpleInterval("chr8", 107912008, 107912008)), - new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, + new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, new SimpleInterval("chr8", 107912009, 107912009)) ), null }, { createVariantContext("chr2", 205522308, 205522384, "chr2", null, "N", "", 76, null, null, null), null, GATKSVVCFConstants.StructuralVariantAnnotationType.INV, - Arrays.asList(new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, + Arrays.asList(new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr2", 205522308, 205522384))), null }, { createVariantContext("chr19", 424309, 424309, null, 424309, "T", "T]chr2:86263976]", null, null, "CTX_PP/QQ", null), GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, - Arrays.asList(new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, + Arrays.asList(new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, new SimpleInterval("chr19", 424309, 424309))), null }, { createVariantContext("chr19", 424310, 424310, null, 424309, "C", "[chr2:86263977[C", null, null, "CTX_PP/QQ", null), GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, - Arrays.asList(new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, + Arrays.asList(new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, new SimpleInterval("chr19", 424310, 424310))), null }, { createVariantContext("chr22", 10510000, 10694100, "chr22", null, "N", @@ -695,9 +695,9 @@ public Object[][] getSVTypesAndSegmentsTestData() { GATKSVVCFConstants.ComplexVariantSubtype.dDUP, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, Arrays.asList( - new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, + new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr22", 20267228, 20267614)), - new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.INS, + new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.INS, new SimpleInterval("chr22", 18971159, 18971160))), null }, { createVariantContext("chr22", 22120897, 22120897, "chrX", 126356858, "N", @@ -736,9 +736,9 @@ public Object[][] getSVTypesAndSegmentsTestData() { GATKSVVCFConstants.ComplexVariantSubtype.dupINV, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, Arrays.asList( - new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, + new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, new SimpleInterval("chr22", 36533058, 36533299)), - new SVAnnotateEngine.SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, + new SVSegment(GATKSVVCFConstants.StructuralVariantAnnotationType.INV, new SimpleInterval("chr22", 36533299, 36538234))), null } }; @@ -750,17 +750,17 @@ public void testGetSVTypeAndSegments( final VariantContext variant, final GATKSVVCFConstants.ComplexVariantSubtype complexType, final GATKSVVCFConstants.StructuralVariantAnnotationType expectedSVType, - final List expectedSVSegments, - final List expectedSVSegmentsWithBNDOverlap + final List expectedSVSegments, + final List expectedSVSegmentsWithBNDOverlap ) { GATKSVVCFConstants.StructuralVariantAnnotationType actualSVType = SVAnnotateEngine.getSVType(variant); Assert.assertEquals(actualSVType, expectedSVType); - final List actualSegments = SVAnnotateEngine.getSVSegments(variant, + final List actualSegments = SVAnnotateEngine.getSVSegments(variant, actualSVType, -1, complexType); assertSegmentListEqual(actualSegments, expectedSVSegments); - final List actualSegmentsWithBNDOverlap = SVAnnotateEngine.getSVSegments(variant, + final List actualSegmentsWithBNDOverlap = SVAnnotateEngine.getSVSegments(variant, actualSVType, 15000, complexType); assertSegmentListEqual(actualSegmentsWithBNDOverlap, expectedSVSegmentsWithBNDOverlap != null ? expectedSVSegmentsWithBNDOverlap : expectedSVSegments); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java index 3265e9ecc6d..d79f8b49588 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java @@ -13,6 +13,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; import org.broadinstitute.hellbender.tools.sv.SVCallRecord; import org.broadinstitute.hellbender.tools.sv.SVCallRecordUtils; +import org.broadinstitute.hellbender.tools.sv.SVTestUtils; import org.broadinstitute.hellbender.tools.sv.cluster.*; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; @@ -295,7 +296,7 @@ public void testAgainstSimpleImplementation() { vcfInputFilenames.stream() .flatMap(vcfFilename -> VariantContextTestUtils.readEntireVCFIntoMemory(getToolTestDataDir() + vcfFilename).getValue().stream()) .sorted(IntervalUtils.getDictionaryOrderComparator(referenceSequenceFile.getSequenceDictionary())) - .map(SVCallRecordUtils::create) + .map(v -> SVCallRecordUtils.create(v, SVTestUtils.hg38Dict)) .forEach(engine::add); final Comparator recordComparator = SVCallRecordUtils.getCallComparator(referenceSequenceFile.getSequenceDictionary()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java index 6f4157724d4..be35bdfdeb7 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java @@ -79,9 +79,9 @@ public void testRefPanel() { final SVConcordanceAnnotator annotator = new SVConcordanceAnnotator(); final List inputEvalVariants = VariantContextTestUtils.readEntireVCFIntoMemory(evalVcfPath).getValue() - .stream().map(SVCallRecordUtils::create).collect(Collectors.toList()); + .stream().map(v -> SVCallRecordUtils.create(v, SVTestUtils.hg38Dict)).collect(Collectors.toList()); final List inputTruthVariants = VariantContextTestUtils.readEntireVCFIntoMemory(truthVcfPath).getValue() - .stream().map(SVCallRecordUtils::create).collect(Collectors.toList()); + .stream().map(v -> SVCallRecordUtils.create(v, SVTestUtils.hg38Dict)).collect(Collectors.toList()); final ClosestSVFinder finder = new ClosestSVFinder(linkage, annotator::annotate, dictionary); final List expectedRecords = new ArrayList<>(inputEvalVariants.size()); @@ -346,7 +346,7 @@ public void testSitesOnly() { final Pair> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final List inputEvalVariants = VariantContextTestUtils.readEntireVCFIntoMemory(evalVcfPath).getValue() - .stream().map(SVCallRecordUtils::create).collect(Collectors.toList()); + .stream().map(v -> SVCallRecordUtils.create(v, SVTestUtils.hg38Dict)).collect(Collectors.toList()); Assert.assertEquals(outputVcf.getValue().size(), inputEvalVariants.size()); final long numTruePositives = outputVcf.getValue().stream().filter(v -> v.getAttributeAsString(Concordance.TRUTH_STATUS_VCF_ATTRIBUTE, "").equals(ConcordanceState.TRUE_POSITIVE.getAbbreviation())).count(); Assert.assertEquals((int) numTruePositives, 1104);