Skip to content

Commit

Permalink
Support VariantContext as Parquet in cli package.
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Aug 26, 2019
1 parent 574babe commit 6ba69f4
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class BcftoolsMpileupArgs extends BcftoolsMpileupFnArgs with ADAMSaveAnyArgs wit
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe genotypes to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe variant contexts to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
Expand All @@ -73,8 +73,6 @@ class BcftoolsMpileup(protected val args: BcftoolsMpileupArgs) extends BDGSparkC
val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
warn("inputPath " + args.inputPath + " outputPath " + args.outputPath)

val alignments = sc.loadAlignments(args.inputPath, stringency = stringency)
val variantContexts = new BcftoolsMpileupFn(args, stringency, sc).apply(alignments)

Expand All @@ -87,7 +85,7 @@ class BcftoolsMpileup(protected val args: BcftoolsMpileupArgs) extends BDGSparkC
stringency
)
} else {
variantContexts.toGenotypes.saveAsParquet(args)
variantContexts.saveAsParquet(args)
}
}
}
20 changes: 16 additions & 4 deletions cli/src/main/scala/org/bdgenomics/cannoli/cli/BcftoolsNorm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.util.FileExtensions._
import org.bdgenomics.cannoli.{
BcftoolsNorm => BcftoolsNormFn,
BcftoolsNormArgs => BcftoolsNormFnArgs
Expand All @@ -42,10 +43,10 @@ object BcftoolsNorm extends BDGCommandCompanion {
* Bcftools norm command line arguments.
*/
class BcftoolsNormArgs extends BcftoolsNormFnArgs with ADAMSaveAnyArgs with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in VCF format.", index = 0)
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe variant contexts from (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1)
@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe variant contexts to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
Expand All @@ -72,8 +73,19 @@ class BcftoolsNorm(protected val args: BcftoolsNormArgs) extends BDGSparkCommand
val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
val variantContexts = sc.loadVcf(args.inputPath, stringency = stringency)
val variantContexts = sc.loadVariantContexts(args.inputPath)
val pipedVariantContexts = new BcftoolsNormFn(args, stringency, sc).apply(variantContexts)
pipedVariantContexts.saveAsVcf(args, stringency)

if (isVcfExt(args.outputPath)) {
pipedVariantContexts.saveAsVcf(
args.outputPath,
asSingleFile = args.asSingleFile,
deferMerging = args.deferMerging,
disableFastConcat = args.disableFastConcat,
stringency
)
} else {
pipedVariantContexts.saveAsParquet(args)
}
}
}
4 changes: 2 additions & 2 deletions cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class FreebayesArgs extends FreebayesFnArgs with ADAMSaveAnyArgs with ParquetArg
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe genotypes to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe variant contexts to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
Expand Down Expand Up @@ -82,7 +82,7 @@ class Freebayes(protected val args: FreebayesArgs) extends BDGSparkCommand[Freeb
stringency
)
} else {
variantContexts.toGenotypes.saveAsParquet(args)
variantContexts.saveAsParquet(args)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class SamtoolsMpileupArgs extends SamtoolsMpileupFnArgs with ADAMSaveAnyArgs wit
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe genotypes to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe variant contexts to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
Expand Down Expand Up @@ -85,7 +85,7 @@ class SamtoolsMpileup(protected val args: SamtoolsMpileupArgs) extends BDGSparkC
stringency
)
} else {
variantContexts.toGenotypes.saveAsParquet(args)
variantContexts.saveAsParquet(args)
}
}
}
20 changes: 16 additions & 4 deletions cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.util.FileExtensions._
import org.bdgenomics.cannoli.{ SnpEff => SnpEffFn, SnpEffArgs => SnpEffFnArgs }
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
Expand All @@ -39,10 +40,10 @@ object SnpEff extends BDGCommandCompanion {
* SnpEff command line arguments.
*/
class SnpEffArgs extends SnpEffFnArgs with ADAMSaveAnyArgs with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in VCF format.", index = 0)
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe variant contexts from (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1)
@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe variant contexts to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
Expand All @@ -69,8 +70,19 @@ class SnpEff(protected val args: SnpEffArgs) extends BDGSparkCommand[SnpEffArgs]
val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
val variantContexts = sc.loadVcf(args.inputPath, stringency = stringency)
val variantContexts = sc.loadVariantContexts(args.inputPath)
val pipedVariantContexts = new SnpEffFn(args, stringency, sc).apply(variantContexts)
pipedVariantContexts.saveAsVcf(args, stringency)

if (isVcfExt(args.outputPath)) {
pipedVariantContexts.saveAsVcf(
args.outputPath,
asSingleFile = args.asSingleFile,
deferMerging = args.deferMerging,
disableFastConcat = args.disableFastConcat,
stringency
)
} else {
pipedVariantContexts.saveAsParquet(args)
}
}
}
20 changes: 16 additions & 4 deletions cli/src/main/scala/org/bdgenomics/cannoli/cli/Vep.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.util.FileExtensions._
import org.bdgenomics.cannoli.{ Vep => VepFn, VepArgs => VepFnArgs }
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
Expand All @@ -39,10 +40,10 @@ object Vep extends BDGCommandCompanion {
* Vep command line arguments.
*/
class VepArgs extends VepFnArgs with ADAMSaveAnyArgs with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in VCF format.", index = 0)
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe variant contexts from (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1)
@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe variant contexts to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
Expand All @@ -69,8 +70,19 @@ class Vep(protected val args: VepArgs) extends BDGSparkCommand[VepArgs] with Log
val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
val variantContexts = sc.loadVcf(args.inputPath, stringency = stringency)
val variantContexts = sc.loadVariantContexts(args.inputPath)
val pipedVariantContexts = new VepFn(args, stringency, sc).apply(variantContexts)
pipedVariantContexts.saveAsVcf(args, stringency)

if (isVcfExt(args.outputPath)) {
pipedVariantContexts.saveAsVcf(
args.outputPath,
asSingleFile = args.asSingleFile,
deferMerging = args.deferMerging,
disableFastConcat = args.disableFastConcat,
stringency
)
} else {
pipedVariantContexts.saveAsParquet(args)
}
}
}
20 changes: 16 additions & 4 deletions cli/src/main/scala/org/bdgenomics/cannoli/cli/VtNormalize.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.util.FileExtensions._
import org.bdgenomics.cannoli.{
VtNormalize => VtNormalizeFn,
VtNormalizeArgs => VtNormalizeFnArgs
Expand All @@ -42,10 +43,10 @@ object VtNormalize extends BDGCommandCompanion {
* Vt normalize command line arguments.
*/
class VtNormalizeArgs extends VtNormalizeFnArgs with ADAMSaveAnyArgs with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in VCF format.", index = 0)
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe variant contexts from (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1)
@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe variant contexts to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
Expand All @@ -72,8 +73,19 @@ class VtNormalize(protected val args: VtNormalizeArgs) extends BDGSparkCommand[V
val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
val variantContexts = sc.loadVcf(args.inputPath, stringency = stringency)
val variantContexts = sc.loadVariantContexts(args.inputPath)
val pipedVariantContexts = new VtNormalizeFn(args, stringency, sc).apply(variantContexts)
pipedVariantContexts.saveAsVcf(args, stringency)

if (isVcfExt(args.outputPath)) {
pipedVariantContexts.saveAsVcf(
args.outputPath,
asSingleFile = args.asSingleFile,
deferMerging = args.deferMerging,
disableFastConcat = args.disableFastConcat,
stringency
)
} else {
pipedVariantContexts.saveAsParquet(args)
}
}
}

0 comments on commit 6ba69f4

Please sign in to comment.