Skip to content

Commit

Permalink
HaplotypeCaller should emit gVCF. We'll then use GenotypeGVCFs to gen…
Browse files Browse the repository at this point in the history
…otype prior to VQSR, and will recalculate some missing annotations.
  • Loading branch information
fnothaft committed Feb 25, 2016
1 parent 796deb5 commit 52b401e
Showing 1 changed file with 60 additions and 7 deletions.
67 changes: 60 additions & 7 deletions src/toil_scripts/gatk_germline/germline.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,8 @@ def index(job, shared_ids, input_args):

def haplotype_caller(job, shared_ids, input_args):
"""
Uses GATK HaplotypeCaller to identify SNPs and Indels.
Calls variant quality score recalibration functions.
Uses GATK HaplotypeCaller to identify SNPs and Indels and writes a gVCF.
Calls per-sample genotyper to genotype gVCF.
:param job: Job instance
:param shared_ids: dictionary of shared file promises
Expand All @@ -324,29 +324,82 @@ def haplotype_caller(job, shared_ids, input_args):
work_dir = job.fileStore.getLocalTempDir()
input_files = ['ref.fa', 'ref.fa.fai', 'ref.dict', 'toil.bam', 'toil.bam.bai']
read_from_filestore_hc(job, work_dir, shared_ids, *input_files)
output = 'unified.raw.BOTH.gatk.vcf'
output = '%s.raw.BOTH%s.gvcf' % (input_args['uuid'],
input_args['suffix'])

# Call GATK -- HaplotypeCaller
command = ['-nct', input_args['cpu_count'],
'-R', 'ref.fa',
'-T', 'HaplotypeCaller',
'--genotyping_mode', 'Discovery',
'--output_mode', 'EMIT_VARIANTS_ONLY',
'--emitRefConfidence', 'GVCF',
'-I', 'toil.bam',
'-o', 'unified.raw.BOTH.gatk.vcf',
'-o', output,
'-variant_index_type', 'LINEAR',
'-variant_index_parameter', '128000',
'--annotation', 'QualByDepth',
'--annotation', 'DepthPerSampleHC',
'--annotation', 'FisherStrand',
'--annotation', 'ReadPosRankSumTest']
try:
docker_call(work_dir = work_dir,
tool_parameters = command,
tool = 'quay.io/ucsc_cgl/gatk',
sudo = input_args['sudo'])
except:
sys.stderr.write("Running haplotype caller with %s in %s failed." % (
" ".join(command), work_dir))
raise

# Update fileStore and spawn child job
shared_ids[output] = job.fileStore.writeGlobalFile(os.path.join(work_dir, output))

# upload gvcf
upload_or_move_hc(work_dir, input_args, output)

# call variants prior to vqsr
job.addChildJobFn(genotype_gvcf, shared_ids, input_args)


def genotype_gvcf(job, shared_ids, input_args):
"""
Genotypes the gVCF generated by the HaplotypeCaller.
Calls variant quality score recalibration functions.
:param job: Job instance
:param shared_ids: dictionary of shared file promises
:param input_args: dictionary of input arguments
"""

work_dir = job.fileStore.getLocalTempDir()
input_files = ['%s.raw.BOTH%s.gvcf' % (input_args['uuid'],
input_args['suffix']),
'ref.fa', 'ref.fa.fai', 'ref.dict']
read_from_filestore_hc(job, work_dir, shared_ids, *input_files)
output = 'unified.raw.BOTH.gatk.vcf'

command = ['-nt', input_args['cpu_count'],
'-R', 'ref.fa',
'-T', 'GenotypeGVCFs',
'--variant', '%s.raw.BOTH.gatk.gvcf' % input_args['uuid'],
'--out', output,
'-stand_emit_conf', '10.0',
'-stand_call_conf', '30.0']

try:
docker_call(work_dir = work_dir,
tool_parameters = command,
tool = 'quay.io/ucsc_cgl/gatk',
sudo = input_args['sudo'])
except:
sys.stderr.write("Running haplotype caller with %s in %s failed." % (
sys.stderr.write("Running GenotypeGVCFs with %s in %s failed." % (
" ".join(command), work_dir))
raise

# Update fileStore and spawn child job
shared_ids['unified.raw.BOTH.gatk.vcf'] = job.fileStore.writeGlobalFile(os.path.join(work_dir, output))
shared_ids[output] = job.fileStore.writeGlobalFile(os.path.join(work_dir, output))

# run vqsr
job.addChildJobFn(vqsr_snp, shared_ids, input_args)
job.addChildJobFn(vqsr_indel, shared_ids, input_args)

Expand Down

0 comments on commit 52b401e

Please sign in to comment.