Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TheiaProk] Add emmtyper task for Streptococcus pyogenes #524

Merged
merged 6 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions tasks/species_typing/streptococcus/task_emmtyper.wdl
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
version 1.0

# task adapted with some modifications from Neranjan Perera's GAS_identification task_emmtyper.wdl
# https://github.com/neranjan007/GAS_identification/blob/454ef3b0cc8a90950b48342cde87136962f9adb1/tasks/task_emmtyper.wdl

task emmtyper {
meta {
description: "emm-typing of Streptococcus pyogenes assemblies"
Expand Down Expand Up @@ -37,23 +40,30 @@ task emmtyper {
}
command <<<
echo $(emmtyper --version 2>&1) | sed 's/^.*emmtyper v//' | tee VERSION

emmtyper \
~{'--workflow' + wf} \
~{'--cluster-distance' + cluster_distance} \
~{'--percent-identity' + percid} \
~{'--culling-limit' + culling_limit} \
~{'--mismatch' + mismatch} \
~{'--align-diff' + align_diff} \
~{'--gap' + gap} \
~{'--min-perfect' + min_perfect} \
~{'--min-good' + min_good} \
~{'--max-size' + max_size} \
~{'--workflow ' + wf} \
~{'--cluster-distance ' + cluster_distance} \
~{'--percent-identity ' + percid} \
~{'--culling-limit ' + culling_limit} \
~{'--mismatch ' + mismatch} \
~{'--align-diff ' + align_diff} \
~{'--gap ' + gap} \
~{'--min-perfect ' + min_perfect} \
~{'--min-good ' + min_good} \
~{'--max-size ' + max_size} \
--output-format verbose \
~{assembly} \
> ~{samplename}.tsv
> ~{samplename}_emmtyper.tsv

# emm type is in column 4 for verbose output format
awk -F "\t" '{print $4}' ~{samplename}_emmtyper.tsv > EMM_TYPE
>>>
output {
File emmtyper_results = "~{samplename}.tsv"
String emmtyper_emm_type = read_string("EMM_TYPE")
File emmtyper_results_tsv = "~{samplename}_emmtyper.tsv"
String emmtyper_version = read_string("VERSION")
String emmtyper_docker = docker
}
runtime {
docker: "~{docker}"
Expand Down
8 changes: 8 additions & 0 deletions tasks/utilities/data_export/task_broad_terra_tools.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,10 @@ task export_taxon_tables {
String? seroba_ariba_serotype
String? seroba_ariba_identity
File? seroba_details
String? emmtyper_emm_type
File? emmtyper_results_tsv
String? emmtyper_version
String? emmtyper_docker
String? emmtypingtool_emm_type
File? emmtypingtool_results_xml
String? emmtypingtool_version
Expand Down Expand Up @@ -722,6 +726,10 @@ task export_taxon_tables {
"seroba_ariba_serotype": "~{seroba_ariba_serotype}",
"seroba_ariba_identity": "~{seroba_ariba_identity}",
"seroba_details": "~{seroba_details}",
"emmtyper_emm_type": "~{emmtyper_emm_type}",
"emmtyper_results_tsv": "~{emmtyper_results_tsv}",
"emmtyper_version": "~{emmtyper_version}",
"emmtyper_docker": "~{emmtyper_docker}",
"emmtypingtool_emm_type": "~{emmtypingtool_emm_type}",
"emmtypingtool_reults_xml": "~{emmtypingtool_results_xml}",
"emmtypingtool_version": "~{emmtypingtool_version}",
Expand Down
6 changes: 3 additions & 3 deletions tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -631,11 +631,11 @@
- path: miniwdl_run/wdl/tasks/taxon_id/contamination/task_midas.wdl
md5sum: 64caaaff5910ac0036e2659434500962
- path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl
md5sum: 52556169654cfa3e5de47c1987e83475
md5sum: 14565031f96d01ee6480bb0f9d19551d
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
md5sum: 0b4ef1f3f2711a0460050a1a09d44911
md5sum: 646e726beb68fc61f84a428bf2fb7244
- path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl
md5sum: 6c125054324cd3597f8291baefb54694
md5sum: 43367523b9140ca0d2ac15869046343c
- path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl
contains: ["version", "QC", "output"]
- path: miniwdl_run/workflow.log
Expand Down
6 changes: 3 additions & 3 deletions tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,11 @@
- path: miniwdl_run/wdl/tasks/taxon_id/contamination/task_midas.wdl
md5sum: 64caaaff5910ac0036e2659434500962
- path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl
md5sum: 52556169654cfa3e5de47c1987e83475
md5sum: 14565031f96d01ee6480bb0f9d19551d
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl
md5sum: e2015675a4f53e0e4492e8c6a43dcb37
md5sum: 347c054f9850e885e6a130d1655765d7
- path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl
md5sum: 6c125054324cd3597f8291baefb54694
md5sum: 43367523b9140ca0d2ac15869046343c
- path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl
md5sum: 963090bb29184f61c7025e2bc487de4b
- path: miniwdl_run/workflow.log
Expand Down
9 changes: 9 additions & 0 deletions workflows/theiaprok/wf_theiaprok_fasta.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,10 @@ workflow theiaprok_fasta {
seroba_ariba_serotype = merlin_magic.seroba_ariba_serotype,
seroba_ariba_identity = merlin_magic.seroba_ariba_identity,
seroba_details = merlin_magic.seroba_details,
emmtyper_emm_type = merlin_magic.emmtyper_emm_type,
emmtyper_results_tsv = merlin_magic.emmtyper_results_tsv,
emmtyper_version = merlin_magic.emmtyper_version,
emmtyper_docker = merlin_magic.emmtyper_docker,
pasty_serogroup = merlin_magic.pasty_serogroup,
pasty_serogroup_coverage = merlin_magic.pasty_serogroup_coverage,
pasty_serogroup_fragments = merlin_magic.pasty_serogroup_fragments,
Expand Down Expand Up @@ -678,6 +682,11 @@ workflow theiaprok_fasta {
String? poppunk_GPS_db_version = merlin_magic.poppunk_GPS_db_version
String? poppunk_version = merlin_magic.poppunk_version
String? poppunk_docker = merlin_magic.poppunk_docker
# Streptococcus pyogenes Typing
String? emmtyper_emm_type = merlin_magic.emmtyper_emm_type
File? emmtyper_results_tsv = merlin_magic.emmtyper_results_tsv
String? emmtyper_version = merlin_magic.emmtyper_version
String? emmtyper_docker = merlin_magic.emmtyper_docker
# Haemophilus influenzae Typing
String? hicap_serotype = merlin_magic.hicap_serotype
String? hicap_genes = merlin_magic.hicap_genes
Expand Down
8 changes: 8 additions & 0 deletions workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,10 @@ workflow theiaprok_illumina_pe {
seroba_ariba_serotype = merlin_magic.seroba_ariba_serotype,
seroba_ariba_identity = merlin_magic.seroba_ariba_identity,
seroba_details = merlin_magic.seroba_details,
emmtyper_emm_type = merlin_magic.emmtyper_emm_type,
emmtyper_results_tsv = merlin_magic.emmtyper_results_tsv,
emmtyper_version = merlin_magic.emmtyper_version,
emmtyper_docker = merlin_magic.emmtyper_docker,
emmtypingtool_emm_type = merlin_magic.emmtypingtool_emm_type,
emmtypingtool_results_xml = merlin_magic.emmtypingtool_results_xml,
emmtypingtool_version = merlin_magic.emmtypingtool_version,
Expand Down Expand Up @@ -973,6 +977,10 @@ workflow theiaprok_illumina_pe {
String? seroba_ariba_identity = merlin_magic.seroba_ariba_identity
File? seroba_details = merlin_magic.seroba_details
# Streptococcus pyogenes Typing
String? emmtyper_emm_type = merlin_magic.emmtyper_emm_type
File? emmtyper_results_tsv = merlin_magic.emmtyper_results_tsv
String? emmtyper_version = merlin_magic.emmtyper_version
String? emmtyper_docker = merlin_magic.emmtyper_docker
String? emmtypingtool_emm_type = merlin_magic.emmtypingtool_emm_type
File? emmtypingtool_results_xml = merlin_magic.emmtypingtool_results_xml
String? emmtypingtool_version = merlin_magic.emmtypingtool_version
Expand Down
9 changes: 9 additions & 0 deletions workflows/theiaprok/wf_theiaprok_illumina_se.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,10 @@ workflow theiaprok_illumina_se {
agrvate_agr_num_frameshifts = merlin_magic.agrvate_agr_num_frameshifts,
agrvate_version = merlin_magic.agrvate_version,
agrvate_docker = merlin_magic.agrvate_docker,
emmtyper_emm_type = merlin_magic.emmtyper_emm_type,
emmtyper_results_tsv = merlin_magic.emmtyper_results_tsv,
emmtyper_version = merlin_magic.emmtyper_version,
emmtyper_docker = merlin_magic.emmtyper_docker,
midas_docker = read_QC_trim.midas_docker,
midas_report = read_QC_trim.midas_report,
midas_primary_genus = read_QC_trim.midas_primary_genus,
Expand Down Expand Up @@ -899,6 +903,11 @@ workflow theiaprok_illumina_se {
String? poppunk_GPS_db_version = merlin_magic.poppunk_GPS_db_version
String? poppunk_version = merlin_magic.poppunk_version
String? poppunk_docker = merlin_magic.poppunk_docker
# Streptococcus pyogenes Typing
String? emmtyper_emm_type = merlin_magic.emmtyper_emm_type
File? emmtyper_results_tsv = merlin_magic.emmtyper_results_tsv
String? emmtyper_version = merlin_magic.emmtyper_version
String? emmtyper_docker = merlin_magic.emmtyper_docker
# Haemophilus influenzae Typing
String? hicap_serotype = merlin_magic.hicap_serotype
String? hicap_genes = merlin_magic.hicap_genes
Expand Down
9 changes: 9 additions & 0 deletions workflows/theiaprok/wf_theiaprok_ont.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,10 @@ workflow theiaprok_ont {
agrvate_agr_num_frameshifts = merlin_magic.agrvate_agr_num_frameshifts,
agrvate_version = merlin_magic.agrvate_version,
agrvate_docker = merlin_magic.agrvate_docker,
emmtyper_emm_type = merlin_magic.emmtyper_emm_type,
emmtyper_results_tsv = merlin_magic.emmtyper_results_tsv,
emmtyper_version = merlin_magic.emmtyper_version,
emmtyper_docker = merlin_magic.emmtyper_docker,
pasty_serogroup = merlin_magic.pasty_serogroup,
pasty_serogroup_coverage = merlin_magic.pasty_serogroup_coverage,
pasty_serogroup_fragments = merlin_magic.pasty_serogroup_fragments,
Expand Down Expand Up @@ -882,6 +886,11 @@ workflow theiaprok_ont {
String? poppunk_GPS_db_version = merlin_magic.poppunk_GPS_db_version
String? poppunk_version = merlin_magic.poppunk_version
String? poppunk_docker = merlin_magic.poppunk_docker
# Streptococcus pyogenes Typing
String? emmtyper_emm_type = merlin_magic.emmtyper_emm_type
File? emmtyper_results_tsv = merlin_magic.emmtyper_results_tsv
String? emmtyper_version = merlin_magic.emmtyper_version
String? emmtyper_docker = merlin_magic.emmtyper_docker
# Haemophilus influenzae Typing
String? hicap_serotype = merlin_magic.hicap_serotype
String? hicap_genes = merlin_magic.hicap_genes
Expand Down
32 changes: 32 additions & 0 deletions workflows/utilities/wf_merlin_magic.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import "../../tasks/species_typing/salmonella/task_sistr.wdl" as sistr_task
import "../../tasks/species_typing/staphylococcus/task_agrvate.wdl" as agrvate_task
import "../../tasks/species_typing/staphylococcus/task_spatyper.wdl" as spatyper_task
import "../../tasks/species_typing/staphylococcus/task_staphopiasccmec.wdl" as staphopia_sccmec_task
import "../../tasks/species_typing/streptococcus/task_emmtyper.wdl" as emmtyper_task
import "../../tasks/species_typing/streptococcus/task_emmtypingtool.wdl" as emmtypingtool_task
import "../../tasks/species_typing/streptococcus/task_pbptyper.wdl" as pbptyper
import "../../tasks/species_typing/streptococcus/task_poppunk_streppneumo.wdl" as poppunk_spneumo
Expand All @@ -51,6 +52,17 @@ workflow merlin_magic {
Int? pasty_min_coverage
String? hicap_docker_image
String? pasty_docker_image
String? emmtyper_wf
Int? emmtyper_cluster_distance
Int? emmtyper_percid
Int? emmtyper_culling_limit
Int? emmtyper_mismatch
Int? emmtyper_align_diff
Int? emmtyper_gap
Int? emmtyper_min_perfect
Int? emmtyper_min_good
Int? emmtyper_max_size
String? emmtyper_docker_image
String? emmtypingtool_docker_image
String? shigeifinder_docker_image
String? shigatyper_docker_image
Expand Down Expand Up @@ -338,6 +350,22 @@ workflow merlin_magic {
}
}
if (merlin_tag == "Streptococcus pyogenes") {
call emmtyper_task.emmtyper {
input:
assembly = assembly,
samplename = samplename,
docker = emmtyper_docker_image,
wf = emmtyper_wf,
cluster_distance = emmtyper_cluster_distance,
percid = emmtyper_percid,
culling_limit = emmtyper_culling_limit,
mismatch = emmtyper_mismatch,
align_diff = emmtyper_align_diff,
gap = emmtyper_gap,
min_perfect = emmtyper_min_perfect,
min_good = emmtyper_min_good,
max_size = emmtyper_max_size,
}
if (paired_end && !ont_data) {
call emmtypingtool_task.emmtypingtool {
input:
Expand Down Expand Up @@ -668,6 +696,10 @@ workflow merlin_magic {
String? seroba_ariba_identity = seroba_task.seroba_ariba_identity
File? seroba_details = seroba_task.seroba_details
# Streptococcus pyogenes Typing
String? emmtyper_emm_type = emmtyper.emmtyper_emm_type
File? emmtyper_results_tsv = emmtyper.emmtyper_results_tsv
String? emmtyper_version = emmtyper.emmtyper_version
String? emmtyper_docker = emmtyper.emmtyper_docker
String? emmtypingtool_emm_type = emmtypingtool.emmtypingtool_emm_type
File? emmtypingtool_results_xml = emmtypingtool.emmtypingtool_results_xml
String? emmtypingtool_version = emmtypingtool.emmtypingtool_version
Expand Down