Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TheiaProk TB: new VCF output and modification to the coverage report #245

Merged
merged 24 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
afca582
output tbprofiler vcf
sage-wright Oct 17, 2023
0d030e2
update default docker
sage-wright Oct 17, 2023
0251379
fix path
sage-wright Oct 17, 2023
f520261
add sample id to the beginning of the coverage report
sage-wright Oct 25, 2023
ba8d60d
update default docker
sage-wright Oct 25, 2023
c6fac1e
Merge branch 'smw-tb-vcf-dev' into smw-tb-2023-10-25-dev
sage-wright Oct 25, 2023
efafe25
Enable TBProfiler parameter changes (#246)
frankambrosio3 Nov 13, 2023
23008ad
Merge branch 'main' into smw-tb-2023-10-25-dev
sage-wright Nov 13, 2023
a6fc36c
update md5sums
sage-wright Nov 15, 2023
fe5b8a2
caller_options tbprofiler
frankambrosio3 Nov 28, 2023
cd18ce1
caller_options merlin magic
frankambrosio3 Nov 28, 2023
2f2f2ea
--calling_params tbprofiler
frankambrosio3 Nov 28, 2023
e36494c
calling_params tbprofiler
frankambrosio3 Nov 29, 2023
742acaf
quotes around params tbprofiler
frankambrosio3 Nov 29, 2023
3235eb1
added quotes around calling params tbprofiler
frankambrosio3 Nov 29, 2023
af529a7
"-C 1 -F 0.0" tbprof
frankambrosio3 Nov 29, 2023
2dcd5e2
removed caller options
frankambrosio3 Nov 29, 2023
a8ab72a
hardcoded tbprofiler freebayes params
frankambrosio3 Nov 29, 2023
066d643
re-optionalize
sage-wright Nov 30, 2023
7c29fe8
update md5sums
sage-wright Nov 30, 2023
0085148
Add branch name to versioning task
sage-wright Dec 15, 2023
446b8d9
Merge branch 'main' into smw-tb-2023-10-25-dev
kevinlibuit Dec 29, 2023
2481b17
version reversion for merge
sage-wright Dec 29, 2023
fec38cf
update checksums
sage-wright Dec 29, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion tasks/species_typing/task_tbp_parser.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ task tbp_parser {
Int coverage_threshold = 100
Boolean tbp_parser_debug = false

String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.1.1"
String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.1.4"
Int disk_size = 100
Int memory = 4
Int cpu = 1
Expand Down Expand Up @@ -41,6 +41,9 @@ task tbp_parser {

# get genome average depth
samtools depth -J ~{tbprofiler_bam} | awk -F "\t" '{sum+=$3} END { print sum/NR }' | tee AVG_DEPTH

# add sample id to the beginning of the coverage report
awk '{print "~{samplename},"$0}' ~{samplename}.percent_gene_coverage.csv > tmp.csv && mv -f tmp.csv ~{samplename}.percent_gene_coverage.csv
>>>
output {
File tbp_parser_looker_report_csv = "~{samplename}.looker_report.csv"
Expand Down
19 changes: 11 additions & 8 deletions tasks/species_typing/task_tbprofiler.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ task tbprofiler {
String tbprofiler_docker_image = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:4.4.2"
Int disk_size = 100
String mapper = "bwa"
String caller = "freebayes"
String variant_caller = "freebayes"
String? variant_calling_params
Int min_depth = 10
Float min_af = 0.1
Float min_af_pred = 0.1
Expand Down Expand Up @@ -42,7 +43,6 @@ task tbprofiler {

# check if new database file is provided and not empty
if [ "~{tbprofiler_run_custom_db}" = true ] ; then

echo "Found new database file ~{tbprofiler_custom_db}"
prefix=$(basename "~{tbprofiler_custom_db}" | sed 's/\.tar\.gz$//')
echo "New database will be created with prefix $prefix"
Expand All @@ -53,11 +53,8 @@ task tbprofiler {
tb-profiler load_library ./"$prefix"/"$prefix"

TBDB="--db $prefix"

else

TBDB=""

fi

# Run tb-profiler on the input reads with samplename prefix
Expand All @@ -66,11 +63,11 @@ task tbprofiler {
${INPUT_READS} \
--prefix ~{samplename} \
--mapper ~{mapper} \
--caller ~{caller} \
--caller ~{variant_caller} \
--calling_params "~{variant_calling_params}" \
--min_depth ~{min_depth} \
--af ~{min_af} \
--reporting_af \
~{min_af_pred} \
--reporting_af ~{min_af_pred} \
--coverage_fraction_threshold ~{cov_frac_threshold} \
--csv --txt \
$TBDB
Expand All @@ -81,6 +78,11 @@ task tbprofiler {
# touch optional output files because wdl
touch GENE_NAME LOCUS_TAG VARIANT_SUBSTITUTIONS OUTPUT_SEQ_METHOD_TYPE

# merge all vcf files if multiple are present
bcftools index ./vcf/*bcf
bcftools index ./vcf/*gz
bcftools merge --force-samples ./vcf/*bcf ./vcf/*gz > ./vcf/~{samplename}.targets.csq.merged.vcf

python3 <<CODE
import csv
import json
Expand Down Expand Up @@ -127,6 +129,7 @@ task tbprofiler {
File tbprofiler_output_json = "./results/~{samplename}.results.json"
File tbprofiler_output_bam = "./bam/~{samplename}.bam"
File tbprofiler_output_bai = "./bam/~{samplename}.bam.bai"
File tbprofiler_output_vcf = "./vcf/~{samplename}.targets.csq.merged.vcf"
String version = read_string("VERSION")
String tbprofiler_main_lineage = read_string("MAIN_LINEAGE")
String tbprofiler_sub_lineage = read_string("SUB_LINEAGE")
Expand Down
2 changes: 1 addition & 1 deletion tasks/task_versioning.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ task version_capture {
volatile: true
}
command {
PHB_Version="PHB v1.2.1"
PHB_Version="PHB v1.2.1: smw-tb-2023-10-25-dev branch"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will need to modify before merge

~{default='' 'export TZ=' + timezone}
date +"%Y-%m-%d" > TODAY
echo "$PHB_Version" > PHB_VERSION
Expand Down
6 changes: 3 additions & 3 deletions tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@
- path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl
md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb
- path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl
md5sum: c2a8c0978cc1bd65656584e1bad9dca7
md5sum: e486a508ffbfbf300ad64892d82ddde6
- path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl
md5sum: d49ae0b02e798af0636eb2721bb434b4
- path: miniwdl_run/wdl/tasks/task_versioning.wdl
Expand All @@ -634,9 +634,9 @@
- path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl
md5sum: 43ef050bde1fb8755f38e697a1794918
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
md5sum: 0c87a7279c4870a821c3dc1db9a6a94b
md5sum: c5bfe3dd3c5138d0ab6002a3fb93a139
- path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl
md5sum: 00bd2489b2a7aa5b88340a940961a857
md5sum: ed9b8b3084027873fd92f1e76a649741
- path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl
contains: ["version", "QC", "output"]
- path: miniwdl_run/workflow.log
Expand Down
6 changes: 3 additions & 3 deletions tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@
- path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl
md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb
- path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl
md5sum: c2a8c0978cc1bd65656584e1bad9dca7
md5sum: e486a508ffbfbf300ad64892d82ddde6
- path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl
md5sum: d49ae0b02e798af0636eb2721bb434b4
- path: miniwdl_run/wdl/tasks/task_versioning.wdl
Expand All @@ -600,9 +600,9 @@
- path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl
md5sum: 43ef050bde1fb8755f38e697a1794918
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl
md5sum: e1d9e75dae5176ceeb95b88a5d3bbba7
md5sum: 61585a6028465c3f30f3022d55818211
- path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl
md5sum: 00bd2489b2a7aa5b88340a940961a857
md5sum: ed9b8b3084027873fd92f1e76a649741
- path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl
md5sum: 53d322d895837c0bcb049786572e944d
- path: miniwdl_run/workflow.log
Expand Down
1 change: 1 addition & 0 deletions workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,7 @@ workflow theiaprok_illumina_pe {
File? tbprofiler_output_file = merlin_magic.tbprofiler_output_file
File? tbprofiler_output_bam = merlin_magic.tbprofiler_output_bam
File? tbprofiler_output_bai = merlin_magic.tbprofiler_output_bai
File? tbprofiler_output_vcf = merlin_magic.tbprofiler_output_vcf
String? tbprofiler_version = merlin_magic.tbprofiler_version
String? tbprofiler_main_lineage = merlin_magic.tbprofiler_main_lineage
String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage
Expand Down
1 change: 1 addition & 0 deletions workflows/theiaprok/wf_theiaprok_illumina_se.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,7 @@ workflow theiaprok_illumina_se {
File? tbprofiler_output_file = merlin_magic.tbprofiler_output_file
File? tbprofiler_output_bam = merlin_magic.tbprofiler_output_bam
File? tbprofiler_output_bai = merlin_magic.tbprofiler_output_bai
File? tbprofiler_output_vcf = merlin_magic.tbprofiler_output_vcf
String? tbprofiler_version = merlin_magic.tbprofiler_version
String? tbprofiler_main_lineage = merlin_magic.tbprofiler_main_lineage
String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage
Expand Down
1 change: 1 addition & 0 deletions workflows/theiaprok/wf_theiaprok_ont.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ workflow theiaprok_ont {
File? tbprofiler_output_file = merlin_magic.tbprofiler_output_file
File? tbprofiler_output_bam = merlin_magic.tbprofiler_output_bam
File? tbprofiler_output_bai = merlin_magic.tbprofiler_output_bai
File? tbprofiler_output_vcf = merlin_magic.tbprofiler_output_vcf
String? tbprofiler_version = merlin_magic.tbprofiler_version
String? tbprofiler_main_lineage = merlin_magic.tbprofiler_main_lineage
String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage
Expand Down
17 changes: 16 additions & 1 deletion workflows/utilities/wf_merlin_magic.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ workflow merlin_magic {
Boolean call_shigeifinder_reads_input = false
Boolean assembly_only = false
Boolean theiaeuk = false
String? tbprofiler_mapper
Int? tbprofiler_min_depth
Float? tbprofiler_min_af
Float? tbprofiler_min_af_pred
Int? tbprofiler_cov_frac_threshold
String? tbprofiler_variant_caller
String? tbprofiler_variant_calling_params
Boolean tbprofiler_run_custom_db = false
File tbprofiler_custom_db = "gs://theiagen-public-files/terra/theiaprok-files/tbdb_varpipe_combined_nodups.tar.gz"
Boolean tbprofiler_additional_outputs = false
Expand Down Expand Up @@ -252,7 +259,14 @@ workflow merlin_magic {
samplename = samplename,
tbprofiler_run_custom_db = tbprofiler_run_custom_db,
tbprofiler_custom_db = tbprofiler_custom_db,
ont_data = ont_data
ont_data = ont_data,
mapper = tbprofiler_mapper,
variant_caller = tbprofiler_variant_caller,
variant_calling_params = tbprofiler_variant_calling_params,
min_depth = tbprofiler_min_depth,
min_af = tbprofiler_min_af,
min_af_pred = tbprofiler_min_af_pred,
cov_frac_threshold = tbprofiler_cov_frac_threshold
}
if (tbprofiler_additional_outputs) {
call tbp_parser_task.tbp_parser {
Expand Down Expand Up @@ -580,6 +594,7 @@ workflow merlin_magic {
File? tbprofiler_output_file = tbprofiler.tbprofiler_output_csv
File? tbprofiler_output_bam = tbprofiler.tbprofiler_output_bam
File? tbprofiler_output_bai = tbprofiler.tbprofiler_output_bai
File? tbprofiler_output_vcf = tbprofiler.tbprofiler_output_vcf
String? tbprofiler_version = tbprofiler.version
String? tbprofiler_main_lineage = tbprofiler.tbprofiler_main_lineage
String? tbprofiler_sub_lineage = tbprofiler.tbprofiler_sub_lineage
Expand Down