diff --git a/modules/nf-core/tcoffee/consensus/environment.yml b/modules/nf-core/tcoffee/consensus/environment.yml new file mode 100644 index 00000000000..f5c828409f7 --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/environment.yml @@ -0,0 +1,8 @@ +name: "tcoffee_consensus" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/consensus/main.nf b/modules/nf-core/tcoffee/consensus/main.nf new file mode 100644 index 00000000000..666c1ee0f7f --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/main.nf @@ -0,0 +1,62 @@ +process TCOFFEE_CONSENSUS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/t-coffee_pigz:f47b85d70360f1a0': + 'community.wave.seqera.io/library/t-coffee_pigz:6c9b2f8b97ee55e5' }" + + + input: + tuple val(meta) , path(aln) + tuple val(meta2), path(tree) + val(compress) + + output: + tuple val(meta), path("*.{aln,aln.gz}"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def tree_args = tree ? "-usetree $tree" : "" + def outfile = compress ? "stdout" : "${prefix}.aln" + def write_output = compress ? " | pigz -cp ${task.cpus} > ${prefix}.aln.gz" : "" + """ + export TEMP='./' + t_coffee -aln ${aln} \ + $tree_args \ + $args \ + -thread ${task.cpus} \ + -outfile $outfile \ + $write_output + + if [ -f stdout ] && [ "$compress" = true ]; then + pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz + rm stdout + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' + touch ${prefix}.aln${compress ? '.gz':''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/consensus/meta.yml b/modules/nf-core/tcoffee/consensus/meta.yml new file mode 100644 index 00000000000..54d608300a0 --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/meta.yml @@ -0,0 +1,63 @@ +name: tcoffee_consensus +description: Computes a consensus alignment using T_COFFEE +keywords: + - alignment + - MSA + - genomics +tools: + - tcoffee: + description: "A collection of tools for Computing, Evaluating and Manipulating Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + - pigz: + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - aln: + type: file + description: List of multiple sequence alignments in FASTA format to be used to compute the consensus + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" + + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - alignment: + type: file + description: Consensus alignment in FASTA format + pattern: "*.{fa,fasta,aln}" + +authors: + - "@luisas" +maintainers: + - "@luisas" diff --git a/modules/nf-core/tcoffee/consensus/tests/main.nf.test b/modules/nf-core/tcoffee/consensus/tests/main.nf.test new file mode 100644 index 00000000000..78aa567bf2d --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/tests/main.nf.test @@ -0,0 +1,132 @@ +nextflow_process { + + name "Test Process TCOFFEE_CONSENSUS" + script "../main.nf" + process "TCOFFEE_CONSENSUS" + + tag "modules" + tag "modules_nfcore" + tag "tcoffee" + tag "tcoffee/consensus" + tag "tcoffee/align" + tag "pigz" + tag "famsa/guidetree" + tag "famsa/align" + + config "./sequence.config" + + setup { + run("FAMSA_GUIDETREE") { + script "../../../famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + run("FAMSA_ALIGN") { + script "../../../famsa/align/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = false + + """ + } + } + run("TCOFFEE_ALIGN") { + script "../../../tcoffee/align/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = [[:],[],[]] + input[3] = false + + """ + } + } + } + + test("consensus - no tree - uncompressed - seatoxin ") { + + when { + process { + """ + msas = FAMSA_ALIGN.out.alignment.mix(TCOFFEE_ALIGN.out.alignment).groupTuple() + input[0] = msas + input[1] = [[:],[]] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.alignment, + process.out.versions + ).match() + } + ) + } + } + + test("consensus - tree - compressed- seatoxin") { + + when { + process { + """ + msas = FAMSA_ALIGN.out.alignment.mix(TCOFFEE_ALIGN.out.alignment).groupTuple() + input[0] = msas + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test'], tree]} + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.alignment, + process.out.versions + ).match() + } + ) + } + } + + + test("consensus - stub") { + + options "-stub" + + when { + process { + """ + msas = FAMSA_ALIGN.out.alignment.mix(TCOFFEE_ALIGN.out.alignment).groupTuple() + input[0] = msas + input[1] = [[:],[]] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()} + ) + } + } + +} diff --git a/modules/nf-core/tcoffee/consensus/tests/main.nf.test.snap b/modules/nf-core/tcoffee/consensus/tests/main.nf.test.snap new file mode 100644 index 00000000000..a0fe4f992ac --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/tests/main.nf.test.snap @@ -0,0 +1,63 @@ +{ + "consensus - no tree - uncompressed - seatoxin ": { + "content": [ + [ + [ + { + "id": "test" + }, + "consensus.aln:md5,ed7fb1f7b7a9cd66e9b0c9d60d1b0e52" + ] + ], + [ + "versions.yml:md5,79d4f7ac70fab29f8cd0a18a4d3f76d1" + ] + ], + "timestamp": "2024-09-04T13:17:59.621521" + }, + "consensus - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "consensus.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,79d4f7ac70fab29f8cd0a18a4d3f76d1" + ], + "alignment": [ + [ + { + "id": "test" + }, + "consensus.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,79d4f7ac70fab29f8cd0a18a4d3f76d1" + ] + } + ], + "timestamp": "2024-09-04T13:18:15.69498" + }, + "consensus - tree - compressed- seatoxin": { + "content": [ + [ + [ + { + "id": "test" + }, + "consensus.aln.gz:md5,ed7fb1f7b7a9cd66e9b0c9d60d1b0e52" + ] + ], + [ + "versions.yml:md5,79d4f7ac70fab29f8cd0a18a4d3f76d1" + ] + ], + "timestamp": "2024-09-04T13:18:08.240517" + } +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/consensus/tests/sequence.config b/modules/nf-core/tcoffee/consensus/tests/sequence.config new file mode 100644 index 00000000000..b23494c3cfe --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/tests/sequence.config @@ -0,0 +1,11 @@ +process { + + withName: "TCOFFEE_ALIGN"{ + ext.prefix = "tcoffee_test" + ext.args = { "-output fasta_aln" } + } + withName: "TCOFFEE_CONSENSUS"{ + ext.args = { "-output fasta_aln" } + ext.prefix = "consensus" + } +} diff --git a/modules/nf-core/tcoffee/consensus/tests/tags.yml b/modules/nf-core/tcoffee/consensus/tests/tags.yml new file mode 100644 index 00000000000..f3eb4719b7c --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/consensus: + - "modules/nf-core/tcoffee/consensus/**"