From afca58297c6a27e4647698419441ae16d0be7e21 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 17 Oct 2023 18:38:23 +0000 Subject: [PATCH 01/21] output tbprofiler vcf --- tasks/species_typing/task_tbprofiler.wdl | 10 ++++++---- workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 1 + workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 1 + workflows/theiaprok/wf_theiaprok_ont.wdl | 1 + workflows/utilities/wf_merlin_magic.wdl | 1 + 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index bb8b4e21d..682126bc8 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -42,7 +42,6 @@ task tbprofiler { # check if new database file is provided and not empty if [ "~{tbprofiler_run_custom_db}" = true ] ; then - echo "Found new database file ~{tbprofiler_custom_db}" prefix=$(basename "~{tbprofiler_custom_db}" | sed 's/\.tar\.gz$//') echo "New database will be created with prefix $prefix" @@ -53,11 +52,8 @@ task tbprofiler { tb-profiler load_library ./"$prefix"/"$prefix" TBDB="--db $prefix" - else - TBDB="" - fi # Run tb-profiler on the input reads with samplename prefix @@ -81,6 +77,11 @@ task tbprofiler { # touch optional output files because wdl touch GENE_NAME LOCUS_TAG VARIANT_SUBSTITUTIONS OUTPUT_SEQ_METHOD_TYPE + # merge all vcf files if multiple are present + bcftools convert -O b ./vcf/~{samplename}.targets.csq.vcf > ./vcf/~{samplename}.targets.csq.bcf + bcftools index *bcf + bcftools merge --force-samples *bcf > ./vcf/~{samplename}.targets.csq.merged.vcf + python3 < Date: Tue, 17 Oct 2023 18:59:40 +0000 Subject: [PATCH 02/21] update default docker --- tasks/species_typing/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/task_tbp_parser.wdl b/tasks/species_typing/task_tbp_parser.wdl index 511cefd0f..007320bfd 100644 --- a/tasks/species_typing/task_tbp_parser.wdl +++ b/tasks/species_typing/task_tbp_parser.wdl @@ -13,7 +13,7 @@ task tbp_parser { Int coverage_threshold = 100 Boolean tbp_parser_debug = false - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.1.1" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.1.3" Int disk_size = 100 Int memory = 4 Int cpu = 1 From 0251379d952daff2772387704cc1c39759edffea Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Tue, 17 Oct 2023 19:44:02 +0000 Subject: [PATCH 03/21] fix path --- tasks/species_typing/task_tbprofiler.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index 682126bc8..16a553e8f 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -78,9 +78,9 @@ task tbprofiler { touch GENE_NAME LOCUS_TAG VARIANT_SUBSTITUTIONS OUTPUT_SEQ_METHOD_TYPE # merge all vcf files if multiple are present - bcftools convert -O b ./vcf/~{samplename}.targets.csq.vcf > ./vcf/~{samplename}.targets.csq.bcf - bcftools index *bcf - bcftools merge --force-samples *bcf > ./vcf/~{samplename}.targets.csq.merged.vcf + bcftools index ./vcf/*bcf + bcftools index ./vcf/*gz + bcftools merge --force-samples ./vcf/*bcf ./vcf/*gz > ./vcf/~{samplename}.targets.csq.merged.vcf python3 < Date: Wed, 25 Oct 2023 16:28:26 +0000 Subject: [PATCH 04/21] add sample id to the beginning of the coverage report --- tasks/species_typing/task_tbp_parser.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tasks/species_typing/task_tbp_parser.wdl b/tasks/species_typing/task_tbp_parser.wdl index 511cefd0f..b6685ce18 100644 --- a/tasks/species_typing/task_tbp_parser.wdl +++ b/tasks/species_typing/task_tbp_parser.wdl @@ -41,6 +41,9 @@ task tbp_parser { # get genome average depth samtools depth -J ~{tbprofiler_bam} | awk -F "\t" '{sum+=$3} END { print sum/NR }' | tee AVG_DEPTH + + # add sample id to the beginning of the coverage report + awk '{print "~{samplename},"$0}' ~{samplename}.percent_gene_coverage.csv > tmp.csv && mv -f tmp.csv ~{samplename}.percent_gene_coverage.csv >>> output { File tbp_parser_looker_report_csv = "~{samplename}.looker_report.csv" From ba8d60ddbd0e4ff98987545facc42ed6b754bec6 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Wed, 25 Oct 2023 16:28:41 +0000 Subject: [PATCH 05/21] update default docker --- tasks/species_typing/task_tbp_parser.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/task_tbp_parser.wdl b/tasks/species_typing/task_tbp_parser.wdl index b6685ce18..c18bbd098 100644 --- a/tasks/species_typing/task_tbp_parser.wdl +++ b/tasks/species_typing/task_tbp_parser.wdl @@ -13,7 +13,7 @@ task tbp_parser { Int coverage_threshold = 100 Boolean tbp_parser_debug = false - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.1.1" + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.1.4" Int disk_size = 100 Int memory = 4 Int cpu = 1 From efafe2591af3991da437909a9311b60d21264a75 Mon Sep 17 00:00:00 2001 From: frankambrosio3 <64995676+frankambrosio3@users.noreply.github.com> Date: Mon, 13 Nov 2023 15:37:24 -0500 Subject: [PATCH 06/21] Enable TBProfiler parameter changes (#246) * updated VCF output file renaming in kSNP3 task (#207) * updated VCF output file renaming in kSNP3 task; also added 1 new File output and change the output names to be more descriptive * ksnp3 task:changed VCF file names to be predictable; split 2 ksnp3 options to 2 lines for readability; added new string output "ksnp3_vcf_ref_samplename" to capture sample within cluster to use for snp calling * added new string output to ksnp3 workflow "ksnp3_vcf_ref_samplename" * reduce unnecessary logging in MIDAS task (#210) * made untar/decompression of midas database quiet since it produces 41k lines of output. also made the 2 mv commands verbose (but it's only 2 lines!) * update CI * expose tbprofiler parameters as inputs in merlin * input spelling --------- Co-authored-by: Curtis Kapsak --- tasks/phylogenetic_inference/task_ksnp3.wdl | 19 ++++++++++++++----- tasks/taxon_id/task_midas.wdl | 7 ++++--- .../test_wf_theiaprok_illumina_pe.yml | 2 +- .../test_wf_theiaprok_illumina_se.yml | 2 +- workflows/phylogenetics/wf_ksnp3.wdl | 4 +++- workflows/utilities/wf_merlin_magic.wdl | 14 +++++++++++++- 6 files changed, 36 insertions(+), 12 deletions(-) diff --git a/tasks/phylogenetic_inference/task_ksnp3.wdl b/tasks/phylogenetic_inference/task_ksnp3.wdl index c24844f4f..2ae354ef2 100644 --- a/tasks/phylogenetic_inference/task_ksnp3.wdl +++ b/tasks/phylogenetic_inference/task_ksnp3.wdl @@ -53,7 +53,8 @@ task ksnp3 { -in ksnp3_input.tsv \ -outdir ksnp3 \ -k ~{kmer_size} \ - -core -vcf \ + -core \ + -vcf \ ~{'-SNPs_all ' + previous_ksnp3_snps} \ ~{ksnp3_args} @@ -71,7 +72,13 @@ task ksnp3 { echo "The core SNP matrix could not be produced" | tee SKIP_SNP_DIST # otherwise, skip fi - mv -v ksnp3/VCF.*.vcf ksnp3/~{cluster_name}_core.vcf + # capture sample name of genome used as reference + ls ksnp3/*.vcf | cut -d '.' -f 2 | tee KSNP3_VCF_REF_SAMPLENAME.txt + + # rename the 2 vcf files by appending ~{cluster_name} and removing the ref genome name to make final filenames predictable + mv -v ksnp3/VCF.*.vcf ksnp3/~{cluster_name}_VCF.reference_genome.vcf + mv -v ksnp3/VCF.SNPsNotinRef.* ksnp3/~{cluster_name}_VCF_.SNPsNotinRef.tsv + mv -v ksnp3/SNPs_all_matrix.fasta ksnp3/~{cluster_name}_pan_SNPs_matrix.fasta mv -v ksnp3/tree.parsimony.tre ksnp3/~{cluster_name}_pan_parsimony.nwk @@ -84,9 +91,11 @@ task ksnp3 { >>> output { - File ksnp3_core_matrix = "ksnp3/${cluster_name}_core_SNPs_matrix.fasta" - File ksnp3_core_tree = "ksnp3/${cluster_name}_core.nwk" - File ksnp3_core_vcf = "ksnp3/${cluster_name}_core.vcf" + File ksnp3_core_matrix = "ksnp3/~{cluster_name}_core_SNPs_matrix.fasta" + File ksnp3_core_tree = "ksnp3/~{cluster_name}_core.nwk" + File ksnp3_vcf_ref_genome = "ksnp3/~{cluster_name}_VCF.reference_genome.vcf" + File ksnp3_vcf_snps_not_in_ref = "ksnp3/~{cluster_name}_VCF_.SNPsNotinRef.tsv" + String ksnp3_vcf_ref_samplename = read_string("KSNP3_VCF_REF_SAMPLENAME.txt") File ksnp3_pan_matrix = "ksnp3/~{cluster_name}_pan_SNPs_matrix.fasta" File ksnp3_pan_parsimony_tree = "ksnp3/~{cluster_name}_pan_parsimony.nwk" File? ksnp3_ml_tree = "ksnp3/~{cluster_name}_ML.nwk" diff --git a/tasks/taxon_id/task_midas.wdl b/tasks/taxon_id/task_midas.wdl index a5761a5cb..6d3827370 100644 --- a/tasks/taxon_id/task_midas.wdl +++ b/tasks/taxon_id/task_midas.wdl @@ -16,14 +16,15 @@ task midas { # Decompress the Midas database mkdir db - tar -C ./db/ -xzvf ~{midas_db} + echo "Decompressing Midas database. Please be patient, this may take a few minutes." + tar -C ./db/ -xzf ~{midas_db} # Run Midas run_midas.py species ~{samplename} -1 ~{read1} ~{'-2 ' + read2} -d db/midas_db_v1.2/ -t ~{cpu} # rename output files - mv ~{samplename}/species/species_profile.txt ~{samplename}/species/~{samplename}_species_profile.tsv - mv ~{samplename}/species/log.txt ~{samplename}/species/~{samplename}_log.txt + mv -v ~{samplename}/species/species_profile.txt ~{samplename}/species/~{samplename}_species_profile.tsv + mv -v ~{samplename}/species/log.txt ~{samplename}/species/~{samplename}_log.txt # Run a python block to parse output file for terra data tables # pandas is available in default docker image for python2 but not python3 diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index a60d9641e..56731c094 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -630,7 +630,7 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_kraken2.wdl md5sum: a1f287e6e6feaf2d7d3c74a70e3b5a28 - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl - md5sum: 024971d1439dff7d59c0a26a824bd2c6 + md5sum: faacd87946ee3fbdf70f3a15b79ce547 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 43ef050bde1fb8755f38e697a1794918 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 034238008..f4671cf72 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -598,7 +598,7 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_kraken2.wdl md5sum: a1f287e6e6feaf2d7d3c74a70e3b5a28 - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl - md5sum: 024971d1439dff7d59c0a26a824bd2c6 + md5sum: faacd87946ee3fbdf70f3a15b79ce547 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 43ef050bde1fb8755f38e697a1794918 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl diff --git a/workflows/phylogenetics/wf_ksnp3.wdl b/workflows/phylogenetics/wf_ksnp3.wdl index f86d6fb00..3b8c064f8 100644 --- a/workflows/phylogenetics/wf_ksnp3.wdl +++ b/workflows/phylogenetics/wf_ksnp3.wdl @@ -67,7 +67,9 @@ workflow ksnp3_workflow { String ksnp3_docker = ksnp3_task.ksnp3_docker_image # ksnp3_outputs String ksnp3_snp_dists_version = pan_snp_dists.snp_dists_version - File ksnp3_core_vcf = ksnp3_task.ksnp3_core_vcf + File ksnp3_vcf_ref_genome = ksnp3_task.ksnp3_vcf_ref_genome + File ksnp3_vcf_snps_not_in_ref = ksnp3_task.ksnp3_vcf_snps_not_in_ref + String ksnp3_vcf_ref_samplename = ksnp3_task.ksnp3_vcf_ref_samplename String ksnp3_core_snp_matrix_status = ksnp3_task.skip_core_snp_dists File ksnp3_snps = ksnp3_task.ksnp3_snps_all # ordered matrixes and reordered trees diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 98dd4a4dc..6a23c8127 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -62,6 +62,12 @@ workflow merlin_magic { Boolean call_shigeifinder_reads_input = false Boolean assembly_only = false Boolean theiaeuk = false + String? tbp_mapper + String? tbp_caller + Int? tbp_min_depth + Float? tbp_min_af + Float? tbp_min_af_pred + Int? tbp_cov_frac_threshold Boolean tbprofiler_run_custom_db = false File tbprofiler_custom_db = "gs://theiagen-public-files/terra/theiaprok-files/tbdb_varpipe_combined_nodups.tar.gz" Boolean tbprofiler_additional_outputs = false @@ -252,7 +258,13 @@ workflow merlin_magic { samplename = samplename, tbprofiler_run_custom_db = tbprofiler_run_custom_db, tbprofiler_custom_db = tbprofiler_custom_db, - ont_data = ont_data + ont_data = ont_data, + mapper = tbp_mapper, + caller = tbp_caller, + min_depth = tbp_min_depth, + min_af = tbp_min_af, + min_af_pred = tbp_min_af_pred, + cov_frac_threshold = tbp_cov_frac_threshold } if (tbprofiler_additional_outputs) { call tbp_parser_task.tbp_parser { From a6fc36c75fcdd3d028278606e86a72df12320250 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Wed, 15 Nov 2023 15:42:47 +0000 Subject: [PATCH 07/21] update md5sums --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 6 +++--- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 5ecac975e..f2c49145f 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -620,7 +620,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb - path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl - md5sum: c2a8c0978cc1bd65656584e1bad9dca7 + md5sum: 0cc3554c6af825e450398f5cdcec807e - path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl md5sum: d49ae0b02e798af0636eb2721bb434b4 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -634,9 +634,9 @@ - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 43ef050bde1fb8755f38e697a1794918 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: 0c87a7279c4870a821c3dc1db9a6a94b + md5sum: c5bfe3dd3c5138d0ab6002a3fb93a139 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: 00bd2489b2a7aa5b88340a940961a857 + md5sum: ed9b8b3084027873fd92f1e76a649741 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["version", "QC", "output"] - path: miniwdl_run/workflow.log diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 7bb296e92..4c84265e2 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -586,7 +586,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb - path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl - md5sum: c2a8c0978cc1bd65656584e1bad9dca7 + md5sum: 0cc3554c6af825e450398f5cdcec807e - path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl md5sum: d49ae0b02e798af0636eb2721bb434b4 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -600,9 +600,9 @@ - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 43ef050bde1fb8755f38e697a1794918 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: e1d9e75dae5176ceeb95b88a5d3bbba7 + md5sum: 61585a6028465c3f30f3022d55818211 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: 00bd2489b2a7aa5b88340a940961a857 + md5sum: ed9b8b3084027873fd92f1e76a649741 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl md5sum: 53d322d895837c0bcb049786572e944d - path: miniwdl_run/workflow.log From fe5b8a295fe7437147b258864dea7282c835714e Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Tue, 28 Nov 2023 20:42:32 +0000 Subject: [PATCH 08/21] caller_options tbprofiler --- tasks/species_typing/task_tbprofiler.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index 16a553e8f..bf55e1d5b 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -10,6 +10,7 @@ task tbprofiler { Int disk_size = 100 String mapper = "bwa" String caller = "freebayes" + String caller_options = "" Int min_depth = 10 Float min_af = 0.1 Float min_af_pred = 0.1 @@ -63,10 +64,10 @@ task tbprofiler { --prefix ~{samplename} \ --mapper ~{mapper} \ --caller ~{caller} \ + --caller_options ~{caller_options} \ --min_depth ~{min_depth} \ --af ~{min_af} \ - --reporting_af \ - ~{min_af_pred} \ + --reporting_af ~{min_af_pred} \ --coverage_fraction_threshold ~{cov_frac_threshold} \ --csv --txt \ $TBDB From cd18ce18f2876fddd768bbba2cb46155c994dec8 Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Tue, 28 Nov 2023 20:50:49 +0000 Subject: [PATCH 09/21] caller_options merlin magic --- workflows/utilities/wf_merlin_magic.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 6a23c8127..681d31583 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -64,6 +64,7 @@ workflow merlin_magic { Boolean theiaeuk = false String? tbp_mapper String? tbp_caller + String? tbp_caller_options Int? tbp_min_depth Float? tbp_min_af Float? tbp_min_af_pred @@ -261,6 +262,7 @@ workflow merlin_magic { ont_data = ont_data, mapper = tbp_mapper, caller = tbp_caller, + caller_options = tbp_caller_options, min_depth = tbp_min_depth, min_af = tbp_min_af, min_af_pred = tbp_min_af_pred, From 2f2f2ea31552af6e4c50632dde13c1f777339afc Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Tue, 28 Nov 2023 21:01:07 +0000 Subject: [PATCH 10/21] --calling_params tbprofiler --- tasks/species_typing/task_tbprofiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index bf55e1d5b..8476d5184 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -64,7 +64,7 @@ task tbprofiler { --prefix ~{samplename} \ --mapper ~{mapper} \ --caller ~{caller} \ - --caller_options ~{caller_options} \ + --calling_params ~{caller_options} \ --min_depth ~{min_depth} \ --af ~{min_af} \ --reporting_af ~{min_af_pred} \ From e36494c6e31f5726c9348916abf38173bcbe25f7 Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Wed, 29 Nov 2023 00:31:32 +0000 Subject: [PATCH 11/21] calling_params tbprofiler --- tasks/species_typing/task_tbprofiler.wdl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index 8476d5184..5b5baf16c 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -57,6 +57,14 @@ task tbprofiler { TBDB="" fi + # Print and save input parameters + if [ "~{caller_options}" = true ] ; then + echo "caller options: ~{caller_options}" + CALLER_PARAMS="~{caller_options}" + else + CALLER_PARAMS="" + fi + # Run tb-profiler on the input reads with samplename prefix tb-profiler profile \ ${mode} \ @@ -64,7 +72,7 @@ task tbprofiler { --prefix ~{samplename} \ --mapper ~{mapper} \ --caller ~{caller} \ - --calling_params ~{caller_options} \ + --calling_params ${CALLER_PARAMS} \ --min_depth ~{min_depth} \ --af ~{min_af} \ --reporting_af ~{min_af_pred} \ From 742acaf6d083874a9959ea50c4f3cb4b7ecfbe55 Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Wed, 29 Nov 2023 00:35:59 +0000 Subject: [PATCH 12/21] quotes around params tbprofiler --- tasks/species_typing/task_tbprofiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index 5b5baf16c..c8d0f9021 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -60,7 +60,7 @@ task tbprofiler { # Print and save input parameters if [ "~{caller_options}" = true ] ; then echo "caller options: ~{caller_options}" - CALLER_PARAMS="~{caller_options}" + CALLER_PARAMS='"~{caller_options}"' else CALLER_PARAMS="" fi From 3235eb18924e68f129ecfebfd2c7b04e7a0e57d6 Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Wed, 29 Nov 2023 19:31:04 +0000 Subject: [PATCH 13/21] added quotes around calling params tbprofiler --- tasks/species_typing/task_tbprofiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index c8d0f9021..3e09c688e 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -72,7 +72,7 @@ task tbprofiler { --prefix ~{samplename} \ --mapper ~{mapper} \ --caller ~{caller} \ - --calling_params ${CALLER_PARAMS} \ + --calling_params "${CALLER_PARAMS}" \ --min_depth ~{min_depth} \ --af ~{min_af} \ --reporting_af ~{min_af_pred} \ From af529a7afbe6e8f32cf157235abab64d6b461ea5 Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Wed, 29 Nov 2023 20:10:48 +0000 Subject: [PATCH 14/21] "-C 1 -F 0.0" tbprof --- tasks/species_typing/task_tbprofiler.wdl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index 3e09c688e..28d71571b 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -10,7 +10,7 @@ task tbprofiler { Int disk_size = 100 String mapper = "bwa" String caller = "freebayes" - String caller_options = "" + # String caller_options = "" Int min_depth = 10 Float min_af = 0.1 Float min_af_pred = 0.1 @@ -57,13 +57,13 @@ task tbprofiler { TBDB="" fi - # Print and save input parameters - if [ "~{caller_options}" = true ] ; then - echo "caller options: ~{caller_options}" - CALLER_PARAMS='"~{caller_options}"' - else - CALLER_PARAMS="" - fi + # # Print and save input parameters + # if [ "~{caller_options}" = true ] ; then + # echo "caller options: ~{caller_options}" + # CALLER_PARAMS='"~{caller_options}"' + # else + # CALLER_PARAMS="" + # fi # Run tb-profiler on the input reads with samplename prefix tb-profiler profile \ @@ -72,7 +72,8 @@ task tbprofiler { --prefix ~{samplename} \ --mapper ~{mapper} \ --caller ~{caller} \ - --calling_params "${CALLER_PARAMS}" \ + # --calling_params "${CALLER_PARAMS}" \ + --calling_params "-C 1 -F 0.0" --min_depth ~{min_depth} \ --af ~{min_af} \ --reporting_af ~{min_af_pred} \ From 2dcd5e2c3fa3b595f4c08ca807288bc415ac39f1 Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Wed, 29 Nov 2023 20:12:37 +0000 Subject: [PATCH 15/21] removed caller options --- tasks/species_typing/task_tbprofiler.wdl | 10 ---------- workflows/utilities/wf_merlin_magic.wdl | 2 -- 2 files changed, 12 deletions(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index 28d71571b..c3c0e7f13 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -10,7 +10,6 @@ task tbprofiler { Int disk_size = 100 String mapper = "bwa" String caller = "freebayes" - # String caller_options = "" Int min_depth = 10 Float min_af = 0.1 Float min_af_pred = 0.1 @@ -57,14 +56,6 @@ task tbprofiler { TBDB="" fi - # # Print and save input parameters - # if [ "~{caller_options}" = true ] ; then - # echo "caller options: ~{caller_options}" - # CALLER_PARAMS='"~{caller_options}"' - # else - # CALLER_PARAMS="" - # fi - # Run tb-profiler on the input reads with samplename prefix tb-profiler profile \ ${mode} \ @@ -72,7 +63,6 @@ task tbprofiler { --prefix ~{samplename} \ --mapper ~{mapper} \ --caller ~{caller} \ - # --calling_params "${CALLER_PARAMS}" \ --calling_params "-C 1 -F 0.0" --min_depth ~{min_depth} \ --af ~{min_af} \ diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 681d31583..6a23c8127 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -64,7 +64,6 @@ workflow merlin_magic { Boolean theiaeuk = false String? tbp_mapper String? tbp_caller - String? tbp_caller_options Int? tbp_min_depth Float? tbp_min_af Float? tbp_min_af_pred @@ -262,7 +261,6 @@ workflow merlin_magic { ont_data = ont_data, mapper = tbp_mapper, caller = tbp_caller, - caller_options = tbp_caller_options, min_depth = tbp_min_depth, min_af = tbp_min_af, min_af_pred = tbp_min_af_pred, From a8ab72a774d825f4914e21187bddd29543003d55 Mon Sep 17 00:00:00 2001 From: frankambrosio3 Date: Wed, 29 Nov 2023 20:50:22 +0000 Subject: [PATCH 16/21] hardcoded tbprofiler freebayes params --- tasks/species_typing/task_tbprofiler.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index c3c0e7f13..f8f1307dc 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -63,7 +63,7 @@ task tbprofiler { --prefix ~{samplename} \ --mapper ~{mapper} \ --caller ~{caller} \ - --calling_params "-C 1 -F 0.0" + --calling_params "-C 1 -F 0.0" \ --min_depth ~{min_depth} \ --af ~{min_af} \ --reporting_af ~{min_af_pred} \ From 066d6438420a4a82063f7812cd8f0241157c5e48 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Thu, 30 Nov 2023 18:26:11 +0000 Subject: [PATCH 17/21] re-optionalize --- tasks/species_typing/task_tbprofiler.wdl | 7 ++++--- workflows/utilities/wf_merlin_magic.wdl | 26 +++++++++++++----------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/tasks/species_typing/task_tbprofiler.wdl b/tasks/species_typing/task_tbprofiler.wdl index f8f1307dc..5e56db8e9 100644 --- a/tasks/species_typing/task_tbprofiler.wdl +++ b/tasks/species_typing/task_tbprofiler.wdl @@ -9,7 +9,8 @@ task tbprofiler { String tbprofiler_docker_image = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:4.4.2" Int disk_size = 100 String mapper = "bwa" - String caller = "freebayes" + String variant_caller = "freebayes" + String? variant_calling_params Int min_depth = 10 Float min_af = 0.1 Float min_af_pred = 0.1 @@ -62,8 +63,8 @@ task tbprofiler { ${INPUT_READS} \ --prefix ~{samplename} \ --mapper ~{mapper} \ - --caller ~{caller} \ - --calling_params "-C 1 -F 0.0" \ + --caller ~{variant_caller} \ + --calling_params "~{variant_calling_params}" \ --min_depth ~{min_depth} \ --af ~{min_af} \ --reporting_af ~{min_af_pred} \ diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 6a23c8127..1b820d26d 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -62,12 +62,13 @@ workflow merlin_magic { Boolean call_shigeifinder_reads_input = false Boolean assembly_only = false Boolean theiaeuk = false - String? tbp_mapper - String? tbp_caller - Int? tbp_min_depth - Float? tbp_min_af - Float? tbp_min_af_pred - Int? tbp_cov_frac_threshold + String? tbprofiler_mapper + Int? tbprofiler_min_depth + Float? tbprofiler_min_af + Float? tbprofiler_min_af_pred + Int? tbprofiler_cov_frac_threshold + String? tbprofiler_variant_caller + String? tbprofiler_variant_calling_params Boolean tbprofiler_run_custom_db = false File tbprofiler_custom_db = "gs://theiagen-public-files/terra/theiaprok-files/tbdb_varpipe_combined_nodups.tar.gz" Boolean tbprofiler_additional_outputs = false @@ -259,12 +260,13 @@ workflow merlin_magic { tbprofiler_run_custom_db = tbprofiler_run_custom_db, tbprofiler_custom_db = tbprofiler_custom_db, ont_data = ont_data, - mapper = tbp_mapper, - caller = tbp_caller, - min_depth = tbp_min_depth, - min_af = tbp_min_af, - min_af_pred = tbp_min_af_pred, - cov_frac_threshold = tbp_cov_frac_threshold + mapper = tbprofiler_mapper, + variant_caller = tbprofiler_variant_caller, + variant_calling_params = tbprofiler_variant_calling_params, + min_depth = tbprofiler_min_depth, + min_af = tbprofiler_min_af, + min_af_pred = tbprofiler_min_af_pred, + cov_frac_threshold = tbprofiler_cov_frac_threshold } if (tbprofiler_additional_outputs) { call tbp_parser_task.tbp_parser { From 7c29fe8aa5377f59a6478bce498e774e8ea1af6f Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Thu, 30 Nov 2023 18:35:14 +0000 Subject: [PATCH 18/21] update md5sums --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 2 +- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index f2c49145f..02cdc6518 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -620,7 +620,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb - path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl - md5sum: 0cc3554c6af825e450398f5cdcec807e + md5sum: e486a508ffbfbf300ad64892d82ddde6 - path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl md5sum: d49ae0b02e798af0636eb2721bb434b4 - path: miniwdl_run/wdl/tasks/task_versioning.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 4c84265e2..37e02a463 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -586,7 +586,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb - path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl - md5sum: 0cc3554c6af825e450398f5cdcec807e + md5sum: e486a508ffbfbf300ad64892d82ddde6 - path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl md5sum: d49ae0b02e798af0636eb2721bb434b4 - path: miniwdl_run/wdl/tasks/task_versioning.wdl From 008514806f0aceae02237dc2bc9399096e6a857b Mon Sep 17 00:00:00 2001 From: Sage Wright <40403716+sage-wright@users.noreply.github.com> Date: Fri, 15 Dec 2023 14:57:55 -0500 Subject: [PATCH 19/21] Add branch name to versioning task --- tasks/task_versioning.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/task_versioning.wdl b/tasks/task_versioning.wdl index 6636b76ba..89cdc0c26 100644 --- a/tasks/task_versioning.wdl +++ b/tasks/task_versioning.wdl @@ -9,7 +9,7 @@ task version_capture { volatile: true } command { - PHB_Version="PHB v1.2.1" + PHB_Version="PHB v1.2.1: smw-tb-2023-10-25-dev branch" ~{default='' 'export TZ=' + timezone} date +"%Y-%m-%d" > TODAY echo "$PHB_Version" > PHB_VERSION From 2481b1729015a2492ba06c8aefeb42da1597e216 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 29 Dec 2023 18:18:11 +0000 Subject: [PATCH 20/21] version reversion for merge --- tasks/task_versioning.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/task_versioning.wdl b/tasks/task_versioning.wdl index 89cdc0c26..6636b76ba 100644 --- a/tasks/task_versioning.wdl +++ b/tasks/task_versioning.wdl @@ -9,7 +9,7 @@ task version_capture { volatile: true } command { - PHB_Version="PHB v1.2.1: smw-tb-2023-10-25-dev branch" + PHB_Version="PHB v1.2.1" ~{default='' 'export TZ=' + timezone} date +"%Y-%m-%d" > TODAY echo "$PHB_Version" > PHB_VERSION From fec38cf7b4eaae95ddeb8cd9b59f01dd77093980 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Fri, 29 Dec 2023 18:33:24 +0000 Subject: [PATCH 21/21] update checksums --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 6 +++--- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index bb27c3345..9a2f4de0d 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -620,7 +620,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb - path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl - md5sum: e486a508ffbfbf300ad64892d82ddde6 + md5sum: a90fc52112a8333361f96e50b316d03b - path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl md5sum: d49ae0b02e798af0636eb2721bb434b4 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -634,9 +634,9 @@ - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 4106837e51f6445e02776e0a74606ed5 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: c5bfe3dd3c5138d0ab6002a3fb93a139 + md5sum: 6da70123ba3fd1a3ec5434ef21a4c0cb - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ed9b8b3084027873fd92f1e76a649741 + md5sum: 90eb6ac7463058a81da77120aa45138b - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["version", "QC", "output"] - path: miniwdl_run/workflow.log diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 84ade8058..692d140c3 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -586,7 +586,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb - path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl - md5sum: e486a508ffbfbf300ad64892d82ddde6 + md5sum: a90fc52112a8333361f96e50b316d03b - path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl md5sum: d49ae0b02e798af0636eb2721bb434b4 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -600,9 +600,9 @@ - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 4106837e51f6445e02776e0a74606ed5 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: 61585a6028465c3f30f3022d55818211 + md5sum: e2fbc89ced1fd8e44106e8c12dd9c129 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ed9b8b3084027873fd92f1e76a649741 + md5sum: 90eb6ac7463058a81da77120aa45138b - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl md5sum: 53d322d895837c0bcb049786572e944d - path: miniwdl_run/workflow.log