Skip to content

Commit

Permalink
cleaning up, base level modes done, fixing integration and unit test …
Browse files Browse the repository at this point in the history
…-- need intel build
  • Loading branch information
orlicohen committed Aug 11, 2022
1 parent d6d47dc commit 164bad3
Show file tree
Hide file tree
Showing 23 changed files with 2,562 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,14 @@ private void runFullAlignment(ReferencePair refPair, ReferenceSequenceTable tabl
MummerExecutor executor = new MummerExecutor();
logger.info("Running mummer alignment on sequence " + sequenceName);
File tempSnpsDirectory = IOUtils.createTempDir("tempsnps");
File mummerOutput = executor.executeMummer(ref1Fasta.toPath().toFile(), ref2Fasta.toPath().toFile(), tempSnpsDirectory, sequenceName);
File mummerOutput = executor.executeMummer(ref1Fasta.toPath().toFile(), ref2Fasta.toPath().toFile(), tempSnpsDirectory);
logger.info("Finished running mummer alignment on sequence " + sequenceName);
snpsFiles.add(mummerOutput);
}
}
// merge individual snps files
File snps = new File(baseComparisonOutputDirectory.toPath().toString(), String.format("%s_%s.snps", refPair.getRef1AsString(), refPair.getRef2AsString()));
File snps = IOUtils.createTempFile(String.format("%s_%s", refPair.getRef1AsString(), refPair.getRef2AsString()), ".snps");
/*new File(baseComparisonOutputDirectory.toPath().toString(), String.format("%s_%s.snps", refPair.getRef1AsString(), refPair.getRef2AsString()));*/
try (PrintWriter writer = new PrintWriter(snps)) {
for (File file : snpsFiles) {
try (XReadLines reader = new XReadLines(file)) {
Expand All @@ -312,7 +313,7 @@ private void runFullAlignment(ReferencePair refPair, ReferenceSequenceTable tabl
int previousPos = -1;
for (String line : reader) {
String[] fields = line.split("\\t", -1);
String contig = fields[10];
String contig = fields[12];
int pos = Integer.valueOf(fields[0]);
String ref = fields[1];
String alt = fields[2];
Expand Down Expand Up @@ -532,7 +533,6 @@ private static class MummerIndel{
}

public VariantContext getAsVCFRecord(){
// builder, add fields
VariantContextBuilder vcfBuilder = new VariantContextBuilder();
int stopPos = isInsertion ? pos : pos + ref.length()-1;
VariantContext record = vcfBuilder.chr(chr).start(pos).stop(stopPos).alleles(ref, alt).make();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ public MummerExecutor(File mummerExecutableDirectory){
this.mummerExecutableDirectory = mummerExecutableDirectory;
}


/**
* Returns a MummerExecutor pointing to the unzipped MUMmer executables packaged in GATK
*/
Expand All @@ -63,14 +62,14 @@ public File getMummerExecutableDirectory() {
* @param outputDirectory directory to output final snps file
* @return the final snps File
*/
public File executeMummer(File fasta1, File fasta2, File outputDirectory, String sequenceName){
public File executeMummer(File fasta1, File fasta2, File outputDirectory){

// NUCMER
logger.debug("Running nucmer.");
File nucmerTempDirectory = IOUtils.createTempDir("nucmerTempDir");
File deltaFile = new File(nucmerTempDirectory, "deltaFile"); // delta file for nucmer output --> input to delta-filter
String[] nucmerArgs = {mummerExecutableDirectory.getAbsolutePath() + "/nucmer", "--mum", "-p", deltaFile.getAbsolutePath(), fasta1.getAbsolutePath(), fasta2.getAbsolutePath()};
ProcessOutput nucmer = runShellCommand(nucmerArgs, null, null,false);
ProcessOutput nucmer = runShellCommand(nucmerArgs, null, null,true);

// DELTA-FILTER
logger.debug("Running delta-filter.");
Expand All @@ -80,10 +79,11 @@ public File executeMummer(File fasta1, File fasta2, File outputDirectory, String

// SHOW-SNPS
logger.debug("Running show-snps.");
File showSNPSOutput = new File(outputDirectory, String.format("chr%s_snps_output.snps", sequenceName));
File showSNPSOutput = new File(outputDirectory, "snps_output.snps");
String[] showSNPsArgs = {mummerExecutableDirectory.getAbsolutePath() + "/show-snps", "-rlTH", deltaFilterOutput.getAbsolutePath()};
ProcessOutput showSNPs = runShellCommand(showSNPsArgs, null, showSNPSOutput, false);


return showSNPSOutput;
}

Expand Down Expand Up @@ -143,7 +143,7 @@ public static ProcessOutput runPythonCommand(String script, List<String> scriptA
return output;
}

// method to locate the MUMmer binaries packaged within GATK
// method to unzip and locate the MUMmer binaries packaged within GATK
private File prepareMUMmerExecutionDirectory(){
try{
Resource mummerZipFile = new Resource(MUMMER_BINARIES_ZIPFILE, getClass());
Expand All @@ -160,5 +160,4 @@ private File prepareMUMmerExecutionDirectory(){
throw new UserException("Unable to unzip MUMmer binaries.", e);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,6 @@ public void testCompareReferencesMissingValue() throws IOException{
IntegrationTestSpec.assertEqualTextFiles(output, expectedOutput);
}

// test run on hg38 DRAGEN fastas -- displays EXACT_MATCH
@Test
public void testCompareReferencesDRAGENFastas() throws IOException{
final File ref1 = new File("/Users/ocohen/workingcode/gatk/tempreferences/hg38_better_alt_masked.fa");
final File ref2 = new File("/Users/ocohen/workingcode/gatk/tempreferences/Homo_sapiens_assembly38_masked.fasta");
final File output = createTempFile("testCompareReferencesDRAGENMaskedMatch", ".table");
final File expectedOutput = new File(getToolTestDataDir(), "expected.testCompareReferencesMaskedDRAGENMatch.table");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-O", output.getAbsolutePath()};
runCommandLine(args);
IntegrationTestSpec.assertEqualTextFiles(output, expectedOutput);
}

@DataProvider(name = "md5ArgumentData")
public Object[][] md5ArgumentData() {
return new Object[][]{
Expand Down Expand Up @@ -122,7 +109,7 @@ public void testCompareReferencesToStdOutput() throws IOException{
runCommandLine(args);
}

// The following tests run the tool on different combinations of reference files
// The following test runs the tool on different combinations of reference files
// and produce output to stdout for the sake of manually inspecting outputs.
// Disabled, as no actual assertions made.
@Test(enabled = false)
Expand All @@ -132,18 +119,19 @@ public void testCompareReferencesMultipleReferencesStdOut() throws IOException{
final File ref3 = new File(getToolTestDataDir() + "hg19mini_chr2snp.fasta");
final File ref4 = new File(getToolTestDataDir() + "hg19mini_missingchr1.fasta");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-refcomp", ref3.getAbsolutePath(),
"-refcomp", ref4.getAbsolutePath()};
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref3.getAbsolutePath(), "-refcomp", ref3.getAbsolutePath(),
"-refcomp", ref4.getAbsolutePath(), "-display-sequences-by-name", "-display-only-differing-sequences"};
runCommandLine(args);
}

// FIND_SNPS_ONLY tests:
@Test
public void testFindMultipleSNPs() throws IOException{
public void testFindSNPsMultipleSNPs() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2multiplesnps.fasta");
final File output = IOUtils.createTempDir("tempFindSNPs");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FIND_SNPS_ONLY", "-base-comparison-output", output.toPath().toString()};
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FIND_SNPS_ONLY", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr2multiplesnps.fasta_snps.tsv");
Expand All @@ -152,7 +140,7 @@ public void testFindMultipleSNPs() throws IOException{
}

@Test
public void testFindIUPACSNPs() throws IOException{
public void testFindSNPsIUPACBases() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2iupacsnps.fasta");
final File output = IOUtils.createTempDir("tempFindSNPs");
Expand All @@ -166,6 +154,7 @@ public void testFindIUPACSNPs() throws IOException{
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

// FULL_ALIGNMENT tests:
@Test
public void testFullAlignmentModeMultipleSNPs() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
Expand All @@ -184,14 +173,14 @@ public void testFullAlignmentModeMultipleSNPs() throws IOException{
public void testFullAlignmentModeDeletion() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr1indel.fasta");
//final File output = IOUtils.createTempDir("tempFullAlignmentIndel");
final File output = IOUtils.createTempDir("tempFullAlignmentIndel");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", getToolTestDataDir()};
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

/*final File expectedOutput = new File(getToolTestDataDir(), "expected.testDeletion.hg19mini.fasta_hg19mini_chr1indel.fasta.snps");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr1indel.fasta.snps");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);*/
final File expectedOutput = new File(getToolTestDataDir(), "expected.testDeletion.hg19mini.fasta_hg19mini_chr1indel.fasta.vcf");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr1indel.fasta.vcf");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

@Test
Expand All @@ -203,8 +192,8 @@ public void testFullAlignmentModeInsertion() throws IOException{
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

final File expectedOutput = new File(getToolTestDataDir(), "expected.testInsertion.hg19mini_chr1indel.fasta_hg19mini.fasta.snps");
final File actualOutput = new File(output, "hg19mini_chr1indel.fasta_hg19mini.fasta.snps");
final File expectedOutput = new File(getToolTestDataDir(), "expected.testInsertion.hg19mini_chr1indel.fasta_hg19mini.fasta.vcf");
final File actualOutput = new File(output, "hg19mini_chr1indel.fasta_hg19mini.fasta.vcf");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

Expand All @@ -214,11 +203,25 @@ public void testFullAlignmentSNPsOnMultipleContigs() throws IOException{
final File ref2 = new File(getToolTestDataDir() + "hg19mini_snpsmultiplecontigs.fasta");
final File output = IOUtils.createTempDir("tempFullAlignmentSNPsMultipleContigs");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.toPath().toString()};
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

final File expectedOutput = new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.vcf");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.vcf");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

@Test
public void testFullAlignmentModeSNPAndINDEL() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
final File ref2 = new File(getToolTestDataDir() + "hg19mini_snpandindel.fasta");
final File output = IOUtils.createTempDir("tempFullAlignmentIndel");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

final File expectedOutput = new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.snps");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.snps");
final File expectedOutput = new File(getToolTestDataDir(), "expected.SNPandINDEL.hg19mini.fasta_hg19mini_snpandindel.fasta.vcf");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_snpandindel.fasta.vcf");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class CompareReferencesUnitTest extends CommandLineProgramTest {
@Test
public void testGenerateFastaForSequence() throws IOException {
File ref = new File(getToolTestDataDir() + "hg19mini.fasta");
File expectedOutput = new File("/Users/ocohen/workingcode/gatk/tempreferences/1.fasta");
File expectedOutput = new File(getToolTestDataDir() + "1.fasta");
String sequenceName = "1";
File output = createTempFile("example_chr1", ".fasta");

Expand All @@ -26,33 +26,4 @@ public void testGenerateFastaForSequence() throws IOException {
IntegrationTestSpec.assertEqualTextFiles(output, expectedOutput);
}

@Test
public void testRunShellCommand(){
String[] command = {"echo", "hello"};

MummerExecutor.runShellCommand(command, null, new File("/Users/ocohen/workingcode/hello.output"), true);
}

/* @Test
public void testExecuteMummer() {
File fasta1 = new File(getToolTestDataDir() + "hg19mini.fasta");
File fasta2 = new File(getToolTestDataDir() + "hg19mini_chr2multiplesnps.fasta");
File outputDirectory = new File("/Users/ocohen/workingcode/gatk/tempreferences/");
MummerExecutor exec = new MummerExecutor();
exec.executeMummer(fasta1, fasta2, outputDirectory);
//File expectedOutput = new File(outputDirectory, "snps_output.snps");
}*/

@Test
public void testPrepareMUMmerExecutionDirectory(){
MummerExecutor exec = new MummerExecutor();
File executableDirectory = exec.getMummerExecutableDirectory();
Assert.assertEquals(executableDirectory.listFiles().length, 4);

for(File file : executableDirectory.listFiles()){
Assert.assertTrue(file.getTotalSpace() > 0);
Assert.assertTrue(file.canExecute());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.broadinstitute.hellbender.tools.reference;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.broadinstitute.hellbender.utils.alignment.MummerExecutor;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;

public class MummerExecutorUnitTest extends CommandLineProgramTest {

private final String COMPARE_REFERENCES_TEST_FILES_DIRECTORY = toolsTestDir + "/reference/CompareReferences/";

// need intel build for MUMmer
@Test(enabled = false)
public void testExecuteMummer() throws IOException {
File fasta1 = new File(COMPARE_REFERENCES_TEST_FILES_DIRECTORY + "hg19mini.fasta");
File fasta2 = new File(COMPARE_REFERENCES_TEST_FILES_DIRECTORY + "hg19mini_chr2multiplesnps.fasta");
File expectedOutputDir = new File(getToolTestDataDir());
File actualOutputDir = IOUtils.createTempDir("testMummer");
MummerExecutor exec = new MummerExecutor();

exec.executeMummer(fasta1, fasta2, actualOutputDir);
File expectedOutput = new File(expectedOutputDir, "expected.snps_output.snps");
File actualOutput = new File(actualOutputDir, "snps_output.snps");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

@Test
public void testPrepareMUMmerExecutionDirectory(){
MummerExecutor exec = new MummerExecutor();
File executableDirectory = exec.getMummerExecutableDirectory();
Assert.assertEquals(executableDirectory.listFiles().length, 4);

for(File file : executableDirectory.listFiles()){
Assert.assertTrue(file.getTotalSpace() > 0);
Assert.assertTrue(file.canExecute());
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
>1 dna:chromosome chromosome:GRCh37:1:1:16000:1
>1
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##contig=<ID=1,length=16000>
##contig=<ID=2,length=16000>
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 12881 . G C . . .
2 10400 . AACCCCGAACCCCGAACCCCAACCCCAACCCCAACCCCAACCCTAACCCCTCACCCTCACCCTCGACCCCCGACCCCCGAC A . . .
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 13368 . A T . . .
1 13770 . A C . . .
1 15953 . G C . . .
2 13368 . A T . . .
2 13770 . A C . . .
2 15953 . G C . . .
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

Sequence Name Position hg19mini.fasta hg19mini_chr2multiplesnps.fasta
2 13368 A T
2 13770 A C
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##fileformat=VCFv4.2
##contig=<ID=1,length=16000>
##contig=<ID=2,length=16000>
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 12084 . A C . . .
2 12084 . A T . . .
4 14728 . T A . . .
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
##fileformat=VCFv4.2
##contig=<ID=1,length=16000>
##contig=<ID=2,length=16000>
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 14000 . AACCTGCCCCACAGCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCT A . . .
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
##fileformat=VCFv4.2
##contig=<ID=1,length=15920>
##contig=<ID=2,length=16000>
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 13999 . T TACCTGCCCCACAGCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCT . . .
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
>1 dna:chromosome chromosome:GRCh37:1:1:16000:1
>1 dna:chromosome chromosome:GRCh37:1:1:15920:1
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
Expand Down
Loading

0 comments on commit 164bad3

Please sign in to comment.