From f190668ae42bcc6159772d16b1039e728847fb6c Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 11:30:53 +0200 Subject: [PATCH 1/9] add upp --- CHANGELOG.md | 1 + CITATIONS.md | 4 + conf/modules.config | 2 +- docs/usage.md | 1 + modules.json | 117 +++++++++++++----- modules/nf-core/upp/align/environment.yml | 6 + modules/nf-core/upp/align/main.nf | 71 +++++++++++ modules/nf-core/upp/align/meta.yml | 57 +++++++++ modules/nf-core/upp/align/tests/main.nf.test | 96 ++++++++++++++ .../nf-core/upp/align/tests/main.nf.test.snap | 101 +++++++++++++++ .../nf-core/upp/align/tests/nextflow.config | 7 ++ modules/nf-core/upp/align/tests/tags.yml | 2 + subworkflows/local/align.nf | 21 +++- 13 files changed, 456 insertions(+), 30 deletions(-) create mode 100644 modules/nf-core/upp/align/environment.yml create mode 100644 modules/nf-core/upp/align/main.nf create mode 100644 modules/nf-core/upp/align/meta.yml create mode 100644 modules/nf-core/upp/align/tests/main.nf.test create mode 100644 modules/nf-core/upp/align/tests/main.nf.test.snap create mode 100644 modules/nf-core/upp/align/tests/nextflow.config create mode 100644 modules/nf-core/upp/align/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 45f016b4..03bec162 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Initial release of nf-core/multiplesequencealign, created with the [nf-core](htt - [[#139](https://github.com/nf-core/multiplesequencealign/pull/139)] - Add Foldmason. - [[#146](https://github.com/nf-core/multiplesequencealign/pull/146)] - Only show additional process tags when they exists and use the same ubuntu version in all modules. - [[#145](https://github.com/nf-core/multiplesequencealign/pull/145)] - Add consensus MSA. +- [[#150](https://github.com/nf-core/multiplesequencealign/pull/150)] - Add UPP module. ### `Fixed` diff --git a/CITATIONS.md b/CITATIONS.md index 76048d4c..f2f33cbf 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -56,6 +56,10 @@ > Notredame C, Higgins DG, Heringa J. T-Coffee: A novel method for fast and accurate multiple sequence alignment. J Mol Biol. 2000 Sep 8;302(1):205-17. doi: 10.1006/jmbi.2000.4042. PMID: 10964570. +- [UPP](https://academic.oup.com/bioinformatics/article/39/1/btad007/6982552) + + > Park M, Ivanovic S, Chu G, Shen C, Warnow T. UPP2: fast and accurate alignment of datasets with fragmentary sequences. Bioinformatics. 2023 Jan 1;39(1):btad007. doi: 10.1093/bioinformatics/btad007. PMID: 36625535; PMCID: PMC9846425. + ## Python packages - [Biopython](https://pubmed.ncbi.nlm.nih.gov/19304878/) diff --git a/conf/modules.config b/conf/modules.config index 541db20c..0fdc9259 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -122,7 +122,7 @@ withName: "CREATE_TCOFFEETEMPLATE" { ext.prefix = { "${meta.id}" } } - withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|FOLDMASON_EASYMSA|KALIGN_ALIGN|LEARNMSA_ALIGN|MAFFT|MAGUS_ALIGN|MUSCLE5_SUPER5|REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN" { + withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|FOLDMASON_EASYMSA|KALIGN_ALIGN|LEARNMSA_ALIGN|MAFFT|MAGUS_ALIGN|MUSCLE5_SUPER5|REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN|UPP_ALIGN" { tag = { [ "${meta.id}", diff --git a/docs/usage.md b/docs/usage.md index 323b99c0..37f35ccc 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -50,6 +50,7 @@ The available ALIGN methods are listed below (those that accept guide trees are - [MAGUS](https://github.com/vlasmirnov/MAGUS) (accepts guide tree) - [MUSCLE5](https://drive5.com/muscle5/manual/) - [TCOFFEE](https://tcoffee.readthedocs.io/en/latest/index.html) (accepts guide tree) +- [UPP](https://github.com/smirarab/sepp) (accepts guide tree) **SEQUENCE- and STRUCTURE-BASED** (require both fasta and structures as input): diff --git a/modules.json b/modules.json index ccc334ae..0894533d 100644 --- a/modules.json +++ b/modules.json @@ -8,123 +8,178 @@ "clustalo/align": { "branch": "master", "git_sha": "c332ea831f95f750be962c4b5de655f7a1e6e245", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "clustalo/guidetree": { "branch": "master", "git_sha": "c332ea831f95f750be962c4b5de655f7a1e6e245", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/concat": { "branch": "master", "git_sha": "cfe2a24902bfdfe8132f11461ffda92d257f9f09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/join": { "branch": "master", "git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "famsa/align": { "branch": "master", "git_sha": "e21abdefe46e27b71e0ced2cadc0f2463298aba6", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "famsa/guidetree": { "branch": "master", "git_sha": "e21abdefe46e27b71e0ced2cadc0f2463298aba6", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "foldmason/easymsa": { "branch": "master", "git_sha": "bea7a09bc85c756b8f2befd63311eb1651067073", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kalign/align": { "branch": "master", "git_sha": "e61e5a13ef49c5595986bd31efb85c3f0709a282", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "learnmsa/align": { "branch": "master", "git_sha": "62007703c84bcfef92ce9e4a57cb1cc382917201", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft": { "branch": "master", "git_sha": "64770369d851c45c364e410e052ef9a6c3a7d2bb", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "magus/align": { "branch": "master", "git_sha": "dc37bcdfa78fe3e9ca56e4b85e1621333c7b4301", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "magus/guidetree": { "branch": "master", "git_sha": "dc37bcdfa78fe3e9ca56e4b85e1621333c7b4301", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mtmalign/align": { "branch": "master", "git_sha": "7bfb142c3729c1c76198c237a614215d92fe935c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "muscle5/super5": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/compress": { "branch": "master", "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/uncompress": { "branch": "master", "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/align": { "branch": "master", "git_sha": "03fbf6c89e551bd8d77f3b751fb5c955f75b34c5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/consensus": { "branch": "master", "git_sha": "8b8d8daa4b7d75ccfb290fcb721a00cc98e23567", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "c60c14b285b89bdd0607e371417dadb80385ad6e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/tcs": { "branch": "master", "git_sha": "1cacaceabae75b0c3bc393dee52cb6a5020fcb5c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "upp/align": { + "branch": "master", + "git_sha": "3be751e610b332efd94c2e82ddab5b5c65cfe852", + "installed_by": [ + "modules" + ] } } }, @@ -133,20 +188,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "d20fb2a9cc3e2835e9d067d1046a63252eb17352", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "2fdce49d30c0254f76bc0f13c55c17455c1251ab", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/upp/align/environment.yml b/modules/nf-core/upp/align/environment.yml new file mode 100644 index 00000000..da0eaa9a --- /dev/null +++ b/modules/nf-core/upp/align/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sepp=4.5.5 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/upp/align/main.nf b/modules/nf-core/upp/align/main.nf new file mode 100644 index 00000000..1b254ea4 --- /dev/null +++ b/modules/nf-core/upp/align/main.nf @@ -0,0 +1,71 @@ +process UPP_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/sepp_pigz:d72591720d0277b1': + 'community.wave.seqera.io/library/sepp_pigz:ea6dbc7704a2e251' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + val(compress) + + output: + tuple val(meta), path("*.aln{.gz,}"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def tree_args = tree ? "-t $tree" : "" + """ + + if [ "$workflow.containerEngine" = 'singularity' ]; then + export CONDA_PREFIX="/opt/conda/" + export PASTA_TOOLS_DEVDIR="/opt/conda/bin/" + fi + + run_upp.py \\ + $args \\ + -x $task.cpus \\ + -s ${fasta} \\ + -d . \\ + -o ${prefix} \\ + -p ./upp-temporary + + mv ${prefix}_alignment.fasta ${prefix}.aln + + if ${compress}; then + pigz -p ${task.cpus} ${prefix}.aln + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + upp: \$(run_upp.py -v | grep "run_upp" | cut -f2 -d" ") + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + + + if [ "$compress" = true ]; then + echo | gzip > "${prefix}.aln.gz" + else + touch "${prefix}.aln" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + upp: \$(run_upp.py -v | grep "run_upp" | cut -f2 -d" ") + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/upp/align/meta.yml b/modules/nf-core/upp/align/meta.yml new file mode 100644 index 00000000..a5470c2c --- /dev/null +++ b/modules/nf-core/upp/align/meta.yml @@ -0,0 +1,57 @@ +name: "upp_align" +description: Aligns protein structures using UPP +keywords: + - alignment + - MSA + - genomics + - structure +tools: + - "upp": + description: "SATe-enabled phylogenetic placement" + homepage: "https://github.com/smirarab/sepp/tree/master" + documentation: "https://github.com/smirarab/sepp/blob/master/README.UPP.md" + tool_dev_url: "https://github.com/smirarab/sepp/tree/master" + doi: "10.1093/bioinformatics/btad007" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - alignment: + type: file + description: Alignment file, in FASTA format. May be gzipped or uncompressed, depending on if compress is set to true or false + pattern: "*.aln{.gz,}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@luisas" +maintainers: + - "@luisas" diff --git a/modules/nf-core/upp/align/tests/main.nf.test b/modules/nf-core/upp/align/tests/main.nf.test new file mode 100644 index 00000000..7425d984 --- /dev/null +++ b/modules/nf-core/upp/align/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process UPP_ALIGN" + script "../main.nf" + process "UPP_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "upp" + tag "upp/align" + tag "famsa/guidetree" + + test("fasta - align_sequence - uncompressed") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta - with_tree - compressed") { + config "./nextflow.config" + + setup { + + run("FAMSA_GUIDETREE") { + script "../../../famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'tree' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + + """ + } + } + } + when { + process { + """ + input[0] = [ [ id:'test_tree' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_tree'], tree]} + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()} + ) + } + } + + test("stub") { + config "./nextflow.config" + + options "-stub" + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/upp/align/tests/main.nf.test.snap b/modules/nf-core/upp/align/tests/main.nf.test.snap new file mode 100644 index 00000000..d34254a1 --- /dev/null +++ b/modules/nf-core/upp/align/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "fasta - align_sequence - uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,e6b5291e9cdb40e9b7c72688e4da533b" + ] + ], + "1": [ + "versions.yml:md5,b431bb15ae86dcd4485d921df1752a98" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,e6b5291e9cdb40e9b7c72688e4da533b" + ] + ], + "versions": [ + "versions.yml:md5,b431bb15ae86dcd4485d921df1752a98" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T07:51:30.876772941" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b431bb15ae86dcd4485d921df1752a98" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b431bb15ae86dcd4485d921df1752a98" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T07:36:19.135281969" + }, + "fasta - with_tree - compressed": { + "content": [ + { + "0": [ + [ + { + "id": "test_tree" + }, + "test_tree.aln.gz:md5,e6b5291e9cdb40e9b7c72688e4da533b" + ] + ], + "1": [ + "versions.yml:md5,b431bb15ae86dcd4485d921df1752a98" + ], + "alignment": [ + [ + { + "id": "test_tree" + }, + "test_tree.aln.gz:md5,e6b5291e9cdb40e9b7c72688e4da533b" + ] + ], + "versions": [ + "versions.yml:md5,b431bb15ae86dcd4485d921df1752a98" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T07:52:05.47226891" + } +} \ No newline at end of file diff --git a/modules/nf-core/upp/align/tests/nextflow.config b/modules/nf-core/upp/align/tests/nextflow.config new file mode 100644 index 00000000..30ae1f46 --- /dev/null +++ b/modules/nf-core/upp/align/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: "UPP_ALIGN" { + ext.args = { "-m amino" } + } + +} diff --git a/modules/nf-core/upp/align/tests/tags.yml b/modules/nf-core/upp/align/tests/tags.yml new file mode 100644 index 00000000..adb26a51 --- /dev/null +++ b/modules/nf-core/upp/align/tests/tags.yml @@ -0,0 +1,2 @@ +upp/align: + - "modules/nf-core/upp/align/**" diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 4b4b9aed..db6ffe0b 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -15,12 +15,13 @@ include { KALIGN_ALIGN } from '../../modules/nf-core/kalign include { LEARNMSA_ALIGN } from '../../modules/nf-core/learnmsa/align/main' include { MAFFT } from '../../modules/nf-core/mafft/main' include { MAGUS_ALIGN } from '../../modules/nf-core/magus/align/main' +include { MTMALIGN_ALIGN } from '../../modules/nf-core/mtmalign/align/main' include { MUSCLE5_SUPER5 } from '../../modules/nf-core/muscle5/super5/main' include { TCOFFEE_ALIGN } from '../../modules/nf-core/tcoffee/align/main' include { TCOFFEE_ALIGN as TCOFFEE3D_ALIGN } from '../../modules/nf-core/tcoffee/align/main' include { TCOFFEE_ALIGN as REGRESSIVE_ALIGN } from '../../modules/nf-core/tcoffee/align/main' include { TCOFFEE_CONSENSUS as CONSENSUS } from '../../modules/nf-core/tcoffee/consensus/main' -include { MTMALIGN_ALIGN } from '../../modules/nf-core/mtmalign/align/main' +include { UPP_ALIGN } from '../../modules/nf-core/upp/align/main' workflow ALIGN { take: @@ -90,6 +91,7 @@ workflow ALIGN { regressive: it[0]["aligner"] == "REGRESSIVE" tcoffee: it[0]["aligner"] == "TCOFFEE" tcoffee3d: it[0]["aligner"] == "3DCOFFEE" + upp: it[0]["aligner"] == "UPP" } .set { ch_fasta_trees } @@ -261,6 +263,23 @@ workflow ALIGN { ch_msa = ch_msa.mix(REGRESSIVE_ALIGN.out.alignment) ch_versions = ch_versions.mix(REGRESSIVE_ALIGN.out.versions.first()) + // ----------------- UPP ------------------ + ch_fasta_trees.upp + .multiMap{ + meta, fastafile, treefile -> + fasta: [ meta, fastafile ] + tree: [ meta, treefile ] + } + .set { ch_fasta_trees_upp } + + UPP_ALIGN ( + ch_fasta_trees_upp.fasta, + ch_fasta_trees_upp.tree, + compress + ) + ch_msa = ch_msa.mix(UPP_ALIGN.out.alignment) + ch_versions = ch_versions.mix(UPP_ALIGN.out.versions.first()) + // 2. SEQUENCE + STRUCTURE BASED if(params.templates_suffix == ".pdb"){ From 98a71cbf1f780d5d3422dec38ad76df002609e53 Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 11:34:35 +0200 Subject: [PATCH 2/9] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e778141..ac0f2ae3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,7 @@ Initial release of nf-core/multiplesequencealign, created with the [nf-core](htt - [[#146](https://github.com/nf-core/multiplesequencealign/pull/146)] - Only show additional process tags when they exists and use the same ubuntu version in all modules. - [[#145](https://github.com/nf-core/multiplesequencealign/pull/145)] - Add consensus MSA. - [[#147](https://github.com/nf-core/multiplesequencealign/pull/147)] - Add small testing profile + some fixes of the shiny app. -- [[#150](https://github.com/nf-core/multiplesequencealign/pull/150)] - Add UPP module. +- [[#148](https://github.com/nf-core/multiplesequencealign/pull/148)] - Add UPP module. ### `Fixed` From d8ec33dc419e2b9c4d2a57f35a19e9e9a30af61c Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 11:36:34 +0200 Subject: [PATCH 3/9] fix prettier --- modules.json | 99 ++++++++++++++++++++++++++++--------- subworkflows/local/align.nf | 2 +- 2 files changed, 77 insertions(+), 24 deletions(-) diff --git a/modules.json b/modules.json index 6061480e..64bfcef6 100644 --- a/modules.json +++ b/modules.json @@ -8,83 +8,115 @@ "clustalo/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "clustalo/guidetree": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/concat": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/join": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "famsa/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "famsa/guidetree": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "foldmason/easymsa": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kalign/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "learnmsa/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "magus/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "magus/guidetree": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mtmalign/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "muscle5/super5": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/compress": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/uncompress": { "branch": "master", @@ -96,37 +128,58 @@ "tcoffee/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/consensus": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/tcs": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "upp/align": { + "branch": "master", + "git_sha": "3be751e610b332efd94c2e82ddab5b5c65cfe852", + "installed_by": [ + "modules" + ] } } }, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index db6ffe0b..1f3a9b57 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -271,7 +271,7 @@ workflow ALIGN { tree: [ meta, treefile ] } .set { ch_fasta_trees_upp } - + UPP_ALIGN ( ch_fasta_trees_upp.fasta, ch_fasta_trees_upp.tree, From beb05f8d5e8da279cce848dd6f475cfaba7c922a Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 11:37:36 +0200 Subject: [PATCH 4/9] prettier --- modules.json | 114 +++++++++++++-------------------------------------- 1 file changed, 29 insertions(+), 85 deletions(-) diff --git a/modules.json b/modules.json index 64bfcef6..46f8b5c3 100644 --- a/modules.json +++ b/modules.json @@ -8,178 +8,128 @@ "clustalo/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "clustalo/guidetree": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "csvtk/concat": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "csvtk/join": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "famsa/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "famsa/guidetree": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "foldmason/easymsa": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kalign/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "learnmsa/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mafft": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "magus/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "magus/guidetree": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mtmalign/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "muscle5/super5": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pigz/compress": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pigz/uncompress": { "branch": "master", "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/consensus": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/tcs": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "upp/align": { "branch": "master", "git_sha": "3be751e610b332efd94c2e82ddab5b5c65cfe852", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -188,26 +138,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "d20fb2a9cc3e2835e9d067d1046a63252eb17352", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "2fdce49d30c0254f76bc0f13c55c17455c1251ab", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From 3f0c4867baf4c84b112e1e8d73cf60fa5bf563ff Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 11:44:09 +0200 Subject: [PATCH 5/9] update --- subworkflows/local/align.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 1f3a9b57..2a67f62b 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -263,7 +263,7 @@ workflow ALIGN { ch_msa = ch_msa.mix(REGRESSIVE_ALIGN.out.alignment) ch_versions = ch_versions.mix(REGRESSIVE_ALIGN.out.versions.first()) - // ----------------- UPP ------------------ + // ----------------- UPP ------------------- ch_fasta_trees.upp .multiMap{ meta, fastafile, treefile -> From 01f311cabd814014c1cf0427abda96bd429fa980 Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 12:13:59 +0200 Subject: [PATCH 6/9] update --- .../local/utils_nfcore_multiplesequencealign_pipeline/main.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index c9cb1811..8b1f7648 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -395,9 +395,13 @@ class Utils { args = fix_args(tool,args,"REGRESSIVE", "-reg_method", "famsa_msa") args = fix_args(tool,args,"REGRESSIVE", "-reg_nseq", "1000") args = fix_args(tool,args,"REGRESSIVE", "-output", "fasta_aln") + // TCOFFEE args = fix_args(tool,args,"TCOFFEE", "-output", "fasta_aln") + // UPP + args = fix_args(tool,args,"UPP", "-m", "amino") + return args } From 61347b8ad6777fb043961730c0db1509ab440cf7 Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 12:20:15 +0200 Subject: [PATCH 7/9] update citation --- .../utils_nfcore_multiplesequencealign_pipeline/main.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index 8b1f7648..0d1d2f0e 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -187,13 +187,15 @@ def toolCitationText() { "Clustal Omega (Sievers et al., 2011)", "FAMSA (Deorowicz et al., 2016)", "FastQC (Andrews 2010),", + "Foldmason (Gilchrist et al., 2024)" "Kalign 3 (Lassmann, 2019)", + "learnMSA (Becker & Stanke, 2022)", "MAFFT (Katoh et al., 2002)", + "mTM-align (Dong et al., 2018)", "MultiQC (Ewels et al., 2016)", "Muscle5 (Edgar, 2022)", "T-Coffee (Notredame et al., 2000)", - "learnMSA (Becker & Stanke, 2022)", - "mTM-align (Dong et al., 2018)" + "UPP (Park et al., 2023)" ].join(' ').trim() return citation_text @@ -213,6 +215,7 @@ def toolBibliographyText() { "
  • Lassmann T. Kalign 3: multiple sequence alignment of large data sets. Bioinformatics. 2019 Oct 26;36(6):1928–9. doi: 10.1093/bioinformatics/btz795. Epub ahead of print. PMID: 31665271; PMCID: PMC7703769.
  • ", "
  • Notredame C, Higgins DG, Heringa J. T-Coffee: A novel method for fast and accurate multiple sequence alignment. J Mol Biol. 2000 Sep 8;302(1):205-17. doi: 10.1006/jmbi.2000.4042. PMID: 10964570.
  • ", "
  • O'Sullivan O, Suhre K, Abergel C, Higgins DG, Notredame C. 3DCoffee: combining protein sequences and structures within multiple sequence alignments. J Mol Biol. 2004 Jul 2;340(2):385-95. doi: 10.1016/j.jmb.2004.04.058. PMID: 15201059.
  • ", + "
  • Park M, Ivanovic S, Chu G, Shen C, Warnow T. UPP2: fast and accurate alignment of datasets with fragmentary sequences. Bioinformatics. 2023 Jan 1;39(1):btad007. doi: 10.1093/bioinformatics/btad007. PMID: 36625535; PMCID: PMC9846425.
  • ", "
  • Sievers F, Wilm A, Dineen D, Gibson TJ, Karplus K, Li W, Lopez R, McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG. Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega. Mol Syst Biol. 2011 Oct 11;7:539. doi: 10.1038/msb.2011.75. PMID: 21988835; PMCID: PMC3261699.
  • " ].join(' ').trim() From 46edf5e6fb21cba1cc73bca74314d9e602ef37c8 Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 12:27:44 +0200 Subject: [PATCH 8/9] update citation --- .../local/utils_nfcore_multiplesequencealign_pipeline/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index 0d1d2f0e..53caed3c 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -195,7 +195,7 @@ def toolCitationText() { "MultiQC (Ewels et al., 2016)", "Muscle5 (Edgar, 2022)", "T-Coffee (Notredame et al., 2000)", - "UPP (Park et al., 2023)" + "UPP (Park et al., 2023)" ].join(' ').trim() return citation_text @@ -211,6 +211,7 @@ def toolBibliographyText() { "
  • Dong R, Peng Z, Zhang Y, Yang J. mTM-align: an algorithm for fast and accurate multiple protein structure alignment. Bioinformatics. 2018 May 15;34(10):1719-1725. doi: 10.1093/bioinformatics/btx828. PMID: 29281009; PMCID: PMC5946935.
  • ", "
  • Edgar RC. Muscle5: High-accuracy alignment ensembles enable unbiased assessments of sequence homology and phylogeny. Nat Commun. 2022 Nov 15;13(1):6968. doi: 10.1038/s41467-022-34630-w. PMID: 36379955; PMCID: PMC9664440.
  • ", "
  • Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
  • ", + "
  • Cameron L.M. Gilchrist, Milot Mirdita, Martin Steinegger. bioRxiv 2024.08.01.606130; doi: https://doi.org/10.1101/2024.08.01.606130.
  • ", "
  • Katoh K, Misawa K, Kuma K, Miyata T. MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform. Nucleic Acids Res. 2002 Jul 15;30(14):3059-66. doi: 10.1093/nar/gkf436. PMID: 12136088; PMCID: PMC135756.
  • ", "
  • Lassmann T. Kalign 3: multiple sequence alignment of large data sets. Bioinformatics. 2019 Oct 26;36(6):1928–9. doi: 10.1093/bioinformatics/btz795. Epub ahead of print. PMID: 31665271; PMCID: PMC7703769.
  • ", "
  • Notredame C, Higgins DG, Heringa J. T-Coffee: A novel method for fast and accurate multiple sequence alignment. J Mol Biol. 2000 Sep 8;302(1):205-17. doi: 10.1006/jmbi.2000.4042. PMID: 10964570.
  • ", From 82e44c92649287609eb2bfdb19bd42d43f70d959 Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 17 Sep 2024 12:32:56 +0200 Subject: [PATCH 9/9] update citation --- .../local/utils_nfcore_multiplesequencealign_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index 53caed3c..4a23f982 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -187,7 +187,7 @@ def toolCitationText() { "Clustal Omega (Sievers et al., 2011)", "FAMSA (Deorowicz et al., 2016)", "FastQC (Andrews 2010),", - "Foldmason (Gilchrist et al., 2024)" + "Foldmason (Gilchrist et al., 2024)", "Kalign 3 (Lassmann, 2019)", "learnMSA (Becker & Stanke, 2022)", "MAFFT (Katoh et al., 2002)",