diff --git a/CHANGELOG.md b/CHANGELOG.md index 790022d5..8e6ed5e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Initial release of nf-core/multiplesequencealign, created with the [nf-core](htt [#54](https://github.com/nf-core/multiplesequencealign/issues/54) - Update modules versions from nf-core tools. [#80](https://github.com/nf-core/multiplesequencealign/pull/80) - Update modules versions from nf-core tools with nf-test. [#32](https://github.com/nf-core/multiplesequencealign/issues/32) - Update Stats workflow with nf-core modules for merging. +[#81](https://github.com/nf-core/multiplesequencealign/pull/81) - Update Eval workflow with nf-core modules for merging. ### `Dependencies` diff --git a/bin/parsers.py b/bin/parsers.py index 4b0437e5..17788917 100755 --- a/bin/parsers.py +++ b/bin/parsers.py @@ -15,6 +15,8 @@ def tcoffee_irmsd_parse(input, output): df = df.transpose() # header = ",".join(list(df.columns.str.replace("\s","", regex = True))) # values = ",".join(list(df.iloc[0].tolist())) + # remove all spaces from column names + df.columns = df.columns.str.replace("\s", "", regex=True) df.to_csv(output, index=False) diff --git a/conf/modules.config b/conf/modules.config index e153ba6b..b360484d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -96,7 +96,10 @@ process { } } - withName: ".*EVAL"{ + + withName: 'TCOFFEE_ALNCOMPARE_SP'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}_sp" } + ext.args = "-compare_mode sp" publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, mode: params.publish_dir_mode, @@ -104,6 +107,33 @@ process { ] } + withName: 'TCOFFEE_ALNCOMPARE_TC'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}_tc" } + ext.args = "-compare_mode tc" + publishDir = [ + path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'TCOFFEE_IRMSD'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}_irmsd" } + publishDir = [ + path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'PARSE_IRMSD'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}_irmsd" } + publishDir = [ + path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } withName: "CONCAT_SEQSTATS"{ ext.prefix = { "summary_seqstats" } @@ -114,6 +144,33 @@ process { ] } + withName: "CONCAT_SP"{ + ext.prefix = { "summary_sp" } + publishDir = [ + path: { "${params.outdir}/evaluation/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: "CONCAT_TC"{ + ext.prefix = { "summary_tc" } + publishDir = [ + path: { "${params.outdir}/evaluation/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: "CONCAT_IRMSD"{ + ext.prefix = { "summary_irmsd" } + publishDir = [ + path: { "${params.outdir}/evaluation/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: "CONCAT_SIMSTATS"{ ext.prefix = { "summary_simstats" } publishDir = [ @@ -132,9 +189,11 @@ process { ] } - withName: "MERGE_EVALUATIONS_REPORT"{ + withName: "MERGE_EVAL"{ + ext.prefix = { "complete_summary_eval" } + ext.args = "-f 1,2,3,4,5,6,7" publishDir = [ - path: { "${params.outdir}/summary_report/" }, + path: { "${params.outdir}/evaluation/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -161,6 +220,8 @@ process { ] } + + withName: 'ZIP' { ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}" } publishDir = [ diff --git a/conf/test.config b/conf/test.config index 0ea9a401..20a16507 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Stats - skip_stats = false - skip_eval = true + skip_stats = true + skip_eval = false // Input data //input = 'https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/samplesheet/v1.0/samplesheet_test.csv' diff --git a/modules.json b/modules.json index 00ce3c5e..e2f7ee08 100644 --- a/modules.json +++ b/modules.json @@ -75,6 +75,16 @@ "git_sha": "c83c78835ca6d7a55b3f200718d887cbc7149d37", "installed_by": ["modules"] }, + "tcoffee/alncompare": { + "branch": "master", + "git_sha": "74ee27ccbc2a492a8479323b212b2a42317c3109", + "installed_by": ["modules"] + }, + "tcoffee/irmsd": { + "branch": "master", + "git_sha": "5074bd37c59454497e790d9210e2f7a876f2c24f", + "installed_by": ["modules"] + }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "f759fd45ecabb40c761df1338a4bb3851171a7f7", diff --git a/modules/local/merge_evaluations_report.nf b/modules/local/merge_evaluations_report.nf deleted file mode 100644 index 308f1675..00000000 --- a/modules/local/merge_evaluations_report.nf +++ /dev/null @@ -1,30 +0,0 @@ - - -process MERGE_EVALUATIONS_REPORT { - label 'process_low' - - input: - path(tcoffee_alncompare_scores_summary) - path(tcoffee_irmsd_scores_summary) - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - merge_scores.py \ - "evaluation_summary_report.csv" \ - ${tcoffee_alncompare_scores_summary} \ - ${tcoffee_irmsd_scores_summary} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/parse_irmsd.nf b/modules/local/parse_irmsd.nf new file mode 100644 index 00000000..23e7dea9 --- /dev/null +++ b/modules/local/parse_irmsd.nf @@ -0,0 +1,55 @@ +process PARSE_IRMSD { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + input: + tuple val(meta), path(infile) + + output: + tuple val(meta), path("${prefix}.irmsd_tot"), emit: irmsd_tot + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${infile.baseName}" + def header = meta.keySet().join(",") + def values = meta.values().join(",") + """ + # Parse irmsd file + grep "TOTAL" $infile > ${prefix}.total_irmsd + + parsers.py -i ${prefix}.total_irmsd -o ${prefix}.scores.csv + + # Prep metadata file + echo "${header}" > meta.csv + echo "${values}" >> meta.csv + + # Add metadata info to output file + paste -d, meta.csv ${prefix}.scores.csv > ${prefix}.irmsd_tot + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.irmsd_tot + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ +} diff --git a/modules/local/tcoffee_alncompare_eval.nf b/modules/local/tcoffee_alncompare_eval.nf deleted file mode 100644 index 1ace4bf1..00000000 --- a/modules/local/tcoffee_alncompare_eval.nf +++ /dev/null @@ -1,64 +0,0 @@ - - -process TCOFFEE_ALNCOMPARE_EVAL { - tag "$meta.id" - label 'process_low' - - conda "bioconda::t-coffee=13.45.0.4846264" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/t-coffee:13.45.0.4846264--hc57179f_5': - 'biocontainers/t-coffee:13.45.0.4846264--hc57179f_5' }" - input: - tuple val(meta), file (msa), file (ref_msa) - - output: - tuple val(meta), path ("*.scores"), emit: scores - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def header = meta.keySet().join(",") - def values = meta.values().join(",") - """ - ## Sum-of-Pairs Score ## - t_coffee -other_pg aln_compare \ - -al1 ${ref_msa} \ - -al2 ${msa} \ - -compare_mode sp \ - | grep -v "seq1" | grep -v '*' | \ - awk '{ print \$4}' ORS="\t" \ - >> "scores.txt" - - ## Total Column Score ## - t_coffee -other_pg aln_compare \ - -al1 ${ref_msa} \ - -al2 ${msa} \ - -compare_mode tc \ - | grep -v "seq1" | grep -v '*' | \ - awk '{ print \$4}' ORS="\t" \ - >> "scores.txt" - - ## Column Score ## - t_coffee -other_pg aln_compare \ - -al1 ${ref_msa} \ - -al2 ${msa} \ - -compare_mode column \ - | grep -v "seq1" | grep -v '*' | \ - awk '{ print \$4}' ORS="\t" \ - >> "scores.txt" - - - # Add metadata info to output file - echo "${header},sp,tc,column" > "${msa.baseName}.scores" - - # Add values - scores=\$(awk '{sub(/[[:space:]]+\$/, "")} 1' scores.txt | tr -s '[:blank:]' ',') - echo "${values},\$scores" >> "${msa.baseName}.scores" - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - t_coffee: \$( t_coffee -version | sed 's/.*(Version_\\(.*\\)).*/\\1/' ) - END_VERSIONS - """ -} diff --git a/modules/local/tcoffee_irmsd_eval.nf b/modules/local/tcoffee_irmsd_eval.nf deleted file mode 100644 index cc7524ef..00000000 --- a/modules/local/tcoffee_irmsd_eval.nf +++ /dev/null @@ -1,47 +0,0 @@ - -process TCOFFEE_IRMSD_EVAL { - tag "$meta.id" - label 'process_low' - - conda "bioconda::t-coffee=13.45.0.4846264" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/t-coffee:13.45.0.4846264--hc57179f_5': - 'biocontainers/t-coffee:13.45.0.4846264--hc57179f_5' }" - - input: - tuple val(meta), file (msa), file (ref_msa), file(structures) - - output: - tuple val(meta), path ("*.total_irmsd.csv"), emit: scores - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def header = meta.keySet().join(",") - def values = meta.values().join(",") - """ - # Prep templates - for i in `awk 'sub(/^>/, "")' ${msa}`; do - id_pdb=`echo \$i | sed 's./._.g'`; echo -e ">"\$i "_P_" "\${id_pdb}" >> template_list.txt - done - - # Comp irmsd - t_coffee -other_pg irmsd $msa -template_file template_list.txt | grep "TOTAL" > ${msa.baseName}.total_irmsd - - # Parse irmsd file - parsers.py -i ${msa.baseName}.total_irmsd -o ${msa.baseName}.scores.csv - - # Prep metadata file - echo "${header}" > meta.csv - echo "${values}" >> meta.csv - - # Add metadata info to output file - paste -d, meta.csv ${msa.baseName}.scores.csv > ${msa.baseName}.total_irmsd.csv - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - t_coffee: \$( t_coffee -version | sed 's/.*(Version_\\(.*\\)).*/\\1/' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/tcoffee/alncompare/environment.yml b/modules/nf-core/tcoffee/alncompare/environment.yml new file mode 100644 index 00000000..099528ec --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/environment.yml @@ -0,0 +1,7 @@ +name: "tcoffee_alncompare" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b diff --git a/modules/nf-core/tcoffee/alncompare/main.nf b/modules/nf-core/tcoffee/alncompare/main.nf new file mode 100644 index 00000000..75c008af --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/main.nf @@ -0,0 +1,60 @@ +process TCOFFEE_ALNCOMPARE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': + 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + + input: + tuple val(meta), path(msa), path(ref_msa) + + output: + tuple val(meta), path("*.scores"), emit: scores + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ? task.ext.prefix : "${msa.baseName}" + def args = task.ext.args ? task.ext.args.contains('compare_mode') ? task.ext.args : (task.ext.args +'-compare_mode tc ' ) : '-compare_mode tc ' + def metric_name = args.split('compare_mode ')[1].split(' ')[0] + def header = meta.keySet().join(",") + def values = meta.values().join(",") + + """ + export TEMP='./' + t_coffee -other_pg aln_compare \ + -al1 ${ref_msa} \ + -al2 ${msa} \ + ${args} \ + | grep -v "seq1" | grep -v '*' | \ + awk '{ print \$4}' ORS="\t" \ + >> "scores.txt" + + # Add metadata info to output file + echo "${header},${metric_name}" > "${prefix}.scores" + + # Add values + scores=\$(awk '{sub(/[[:space:]]+\$/, "")} 1' scores.txt | tr -s '[:blank:]' ',') + echo "${values},\$scores" >> "${prefix}.scores" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.scores" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/alncompare/meta.yml b/modules/nf-core/tcoffee/alncompare/meta.yml new file mode 100644 index 00000000..9f852476 --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/meta.yml @@ -0,0 +1,48 @@ +name: "tcoffee_alncompare" +description: Compares 2 alternative MSAs to evaluate them. +keywords: + - alignment + - MSA + - evaluation +tools: + - "tcoffee": + description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein Sequence" + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', ... ] + - msa: + type: file + description: fasta file containing the alignment to be evaluated + pattern: "*.{aln,fa,fasta,fas}" + - ref_msa: + type: file + description: fasta file containing the reference alignment used for the evaluation + pattern: "*.{aln,fa,fasta,fas}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - scores: + type: file + description: a file containing the score of the alignment + pattern: "*.scores" + +authors: + - "@l-mansouri" + - "@luisas" diff --git a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test new file mode 100644 index 00000000..225a4f12 --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process TCOFFEE_ALNCOMPARE" + script "../main.nf" + process "TCOFFEE_ALNCOMPARE" + tag "modules" + tag "modules_nfcore" + tag "tcoffee" + tag "tcoffee/alncompare" + + test("seatoxin") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + """ + } + + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap new file mode 100644 index 00000000..7524515a --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "seatoxin": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "setoxin-ref.scores:md5,c77aceec520beb56f08c342e01c56a14" + ] + ], + "1": [ + "versions.yml:md5,8a30677771f2dc3b61b60702622fdfda" + ], + "scores": [ + [ + { + "id": "test" + }, + "setoxin-ref.scores:md5,c77aceec520beb56f08c342e01c56a14" + ] + ], + "versions": [ + "versions.yml:md5,8a30677771f2dc3b61b60702622fdfda" + ] + } + ], + "timestamp": "2023-12-13T10:50:50.701336" + } +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/alncompare/tests/nextflow.config b/modules/nf-core/tcoffee/alncompare/tests/nextflow.config new file mode 100644 index 00000000..004820cf --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-compare_mode tc" +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/alncompare/tests/tags.yml b/modules/nf-core/tcoffee/alncompare/tests/tags.yml new file mode 100644 index 00000000..1dd179bc --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/alncompare: + - "modules/nf-core/tcoffee/alncompare/**" diff --git a/modules/nf-core/tcoffee/irmsd/environment.yml b/modules/nf-core/tcoffee/irmsd/environment.yml new file mode 100644 index 00000000..635de779 --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/environment.yml @@ -0,0 +1,7 @@ +name: "tcoffee_irmsd" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b diff --git a/modules/nf-core/tcoffee/irmsd/main.nf b/modules/nf-core/tcoffee/irmsd/main.nf new file mode 100644 index 00000000..0c60312e --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/main.nf @@ -0,0 +1,49 @@ +process TCOFFEE_IRMSD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': + 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + + input: + tuple val(meta), file (msa) + tuple val(meta2), file(template), file(structures) + + output: + tuple val(meta), path ("${prefix}.irmsd"), emit: irmsd + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${msa.baseName}" + """ + export TEMP='./' + + t_coffee -other_pg irmsd \ + $msa \ + $args \ + -template_file $template > ${prefix}.irmsd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${msa.baseName}" + """ + touch ${prefix}.irmsd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/irmsd/meta.yml b/modules/nf-core/tcoffee/irmsd/meta.yml new file mode 100644 index 00000000..abee1a3a --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/meta.yml @@ -0,0 +1,51 @@ +name: "tcoffee_irmsd" +description: Computes irmsd score for a given alignment and the structures. +keywords: + - alignment + - MSA + - evaluation +tools: + - "tcoffee": + description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein Sequence" + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', ... ] + - msa: + type: file + description: fasta file containing the alignment to be evaluated + pattern: "*.{aln,fa,fasta,fas}" + - template: + type: file + description: Template file matching the structures to the sequences in the alignment + pattern: "*" + - structures: + type: directory + description: Directory containing the structures file matching the sequences in the alignment in PDB format + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - irmsd: + type: file + description: File containing the irmsd of the alignment + pattern: "*" + +authors: + - "@luisas" diff --git a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test new file mode 100644 index 00000000..1519a66d --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test @@ -0,0 +1,50 @@ +nextflow_process { + + name "Test Process TCOFFEE_IRMSD" + script "../main.nf" + process "TCOFFEE_IRMSD" + tag "modules" + tag "modules_nfcore" + tag "tcoffee" + tag "tcoffee/irmsd" + + test("seatoxin") { + + setup { + + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) + ] + + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} + """ + } + + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.irmsd.get(0).get(1)).getText().contains("1ahl") } + ) + } + + } + +} diff --git a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap new file mode 100644 index 00000000..c036642f --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "seatoxin": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "setoxin.irmsd:md5,a8f49fb2621cdc9fe39690a813ad0ca5" + ] + ], + "1": [ + "versions.yml:md5,60646e38ef71127e3736a06c91c2983f" + ], + "irmsd": [ + [ + { + "id": "test" + }, + "setoxin.irmsd:md5,a8f49fb2621cdc9fe39690a813ad0ca5" + ] + ], + "versions": [ + "versions.yml:md5,60646e38ef71127e3736a06c91c2983f" + ] + } + ], + "timestamp": "2023-12-13T12:26:46.827121" + } +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/irmsd/tests/tags.yml b/modules/nf-core/tcoffee/irmsd/tests/tags.yml new file mode 100644 index 00000000..637c3c57 --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/irmsd: + - "modules/nf-core/tcoffee/irmsd/**" diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 2fcaeaa7..c8a8238e 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -13,7 +13,6 @@ include { MUSCLE5_SUPER5 } from '../../modules/nf-core/musc // Include local modules include { CREATE_TCOFFEETEMPLATE } from '../../modules/local/create_tcoffee_template' -include { MTMALIGN_ALIGN } from '../../modules/local/mtmalign_align' workflow ALIGN { take: @@ -59,7 +58,6 @@ workflow ALIGN { mafft: it[0]["aligner"] == "MAFFT" kalign: it[0]["aligner"] == "KALIGN" learnmsa: it[0]["aligner"] == "LEARNMSA" - mtmalign: it[0]["aligner"] == "MTMALIGN" muscle5: it[0]["aligner"] == "MUSCLE5" } .set { ch_fasta_trees } @@ -133,9 +131,9 @@ workflow ALIGN { // ----------------- 3DCOFFEE ------------------ ch_fasta_trees_3dcoffee = ch_fasta_trees.tcoffee3d.map{ meta, fasta, tree -> [meta["id"], meta, fasta, tree] } - .combine(ch_structures.map{ meta, structures, template -> [meta["id"], structures, template]}, by: 0) + .combine(ch_structures.map{ meta, template, structures -> [meta["id"], template, structures]}, by: 0) .multiMap{ - merging_id, meta, fastafile, treefile, structuresfiles, templatefile -> + merging_id, meta, fastafile, treefile, templatefile, structuresfiles -> fasta: [ meta, fastafile ] tree: [ meta, treefile ] structures: [ meta, templatefile, structuresfiles ] diff --git a/subworkflows/local/evaluate.nf b/subworkflows/local/evaluate.nf index c52b697e..7c4dd9cb 100644 --- a/subworkflows/local/evaluate.nf +++ b/subworkflows/local/evaluate.nf @@ -1,8 +1,13 @@ -include { TCOFFEE_ALNCOMPARE_EVAL } from '../../modules/local/tcoffee_alncompare_eval.nf' -include { TCOFFEE_IRMSD_EVAL } from '../../modules/local/tcoffee_irmsd_eval.nf' -include { MERGE_EVALUATIONS_REPORT } from '../../modules/local/merge_evaluations_report.nf' +include { TCOFFEE_ALNCOMPARE as TCOFFEE_ALNCOMPARE_SP } from '../../modules/nf-core/tcoffee/alncompare' +include { TCOFFEE_ALNCOMPARE as TCOFFEE_ALNCOMPARE_TC } from '../../modules/nf-core/tcoffee/alncompare' +include { TCOFFEE_IRMSD } from '../../modules/nf-core/tcoffee/irmsd' +include { CSVTK_CONCAT as CONCAT_SP } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_CONCAT as CONCAT_TC } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_CONCAT as CONCAT_IRMSD } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_JOIN as MERGE_EVAL } from '../../modules/nf-core/csvtk/join/main.nf' +include { PARSE_IRMSD } from '../../modules/local/parse_irmsd.nf' workflow EVALUATE { @@ -16,53 +21,77 @@ workflow EVALUATE { ch_versions = Channel.empty() - // + // ------------------------------------------- // Reference based evaluation - // + // ------------------------------------------- alignment_and_ref = ch_references.map { meta,ref -> [ meta.id, ref ] } .cross (ch_msa.map { meta, aln -> [ meta.id, meta, aln ] }) - .map { chref, chaln -> [ chaln[1], chref[1], chaln[2] ] } + .map { chref, chaln -> [ chaln[1], chaln[2], chref[1] ] } - TCOFFEE_ALNCOMPARE_EVAL(alignment_and_ref) - tcoffee_alncompare_scores = TCOFFEE_ALNCOMPARE_EVAL.out.scores - ch_versions = ch_versions.mix(TCOFFEE_ALNCOMPARE_EVAL.out.versions.first()) + TCOFFEE_ALNCOMPARE_SP(alignment_and_ref) + sp_scores = TCOFFEE_ALNCOMPARE_SP.out.scores + ch_versions = ch_versions.mix(TCOFFEE_ALNCOMPARE_SP.out.versions.first()) - // + ch_sp_summary = sp_scores.map{ + meta, csv -> csv + }.collect().map{ + csv -> [ [id_simstats:"summary_sp"], csv] + } + CONCAT_SP(ch_sp_summary, "csv", "csv") + + + TCOFFEE_ALNCOMPARE_TC(alignment_and_ref) + tc_scores = TCOFFEE_ALNCOMPARE_TC.out.scores + ch_versions = ch_versions.mix(TCOFFEE_ALNCOMPARE_TC.out.versions.first()) + + ch_tc_summary = tc_scores.map{ + meta, csv -> csv + }.collect().map{ + csv -> [ [id_simstats:"summary_tc"], csv] + } + CONCAT_TC(ch_tc_summary, "csv", "csv") + + + + // ------------------------------------------- // Structure based evaluation - // - alignment_and_ref_and_structures = alignment_and_ref - .map { it -> [ it[0]["id"], it[0], it[1], it[2] ] } - .combine(ch_structures.map { it -> [ it[0]["id"], it[1] ] }, by: 0) - .map { it -> [ it[1], it[2], it[3], it[4] ] } - - TCOFFEE_IRMSD_EVAL(alignment_and_ref_and_structures) - tcoffee_irmsd_scores = TCOFFEE_IRMSD_EVAL.out.scores - ch_versions = ch_versions.mix(TCOFFEE_IRMSD_EVAL.out.versions.first()) + // ------------------------------------------- + msa_str = ch_structures.map { meta, template, str -> [ meta.id, template, str ] } + .cross (ch_msa.map { meta, aln -> [ meta.id, meta, aln ] }) + .multiMap { chstr, chaln -> + msa: [ chaln[1], chaln[2] ] + structures: [ chstr[0], chstr[1], chstr[2] ] + } + + + TCOFFEE_IRMSD(msa_str.msa, msa_str.structures) + tcoffee_irmsd_scores = TCOFFEE_IRMSD.out.irmsd + ch_versions = ch_versions.mix(TCOFFEE_IRMSD.out.versions.first()) + tcoffee_irmsd_scores_tot = PARSE_IRMSD(tcoffee_irmsd_scores) + + ch_irmsd_summary = tcoffee_irmsd_scores_tot.map{ + meta, csv -> csv + }.collect().map{ + csv -> [ [id_simstats:"summary_irmsd"], csv] + } + CONCAT_IRMSD(ch_irmsd_summary, "csv", "csv") - // - // Summarize evaluation summaries into one summary file - // - tcoffee_alncompare_scores_summary = tcoffee_alncompare_scores.map{ it -> "${it[1].text}" } - .collectFile( name: 'tcoffee_alncompare_scores_summary.csv', - keepHeader : true, - skip:1, - newLine: false) - tcoffee_irmsd_scores_summary = tcoffee_irmsd_scores.map{ it -> "${it[1].text}" } - .collectFile( name: 'tcoffee_irmsd_scores_summary.csv', - keepHeader : true, - skip:1, - newLine: false) + // ------------------------------------------- + // MERGE ALL STATS + // ------------------------------------------- - MERGE_EVALUATIONS_REPORT( tcoffee_alncompare_scores_summary, - tcoffee_irmsd_scores_summary ) - ch_versions = ch_versions.mix(MERGE_EVALUATIONS_REPORT.out.versions.first()) + csv_sp = CONCAT_SP.out.csv.map{ meta, csv -> csv } + csv_tc = CONCAT_TC.out.csv.map{ meta, csv -> csv } + csv_irmsd = CONCAT_IRMSD.out.csv.map{ meta, csv -> csv } + csvs_stats = csv_sp.mix(csv_tc).mix(csv_irmsd).collect().map{ csvs -> [[id:"summary_eval"], csvs] } + MERGE_EVAL(csvs_stats) + ch_versions = ch_versions.mix(MERGE_EVAL.out.versions) emit: - tcoffee_alncompare_scores - tcoffee_irmsd_scores + stats_summary = MERGE_EVAL.out.csv versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] } \ No newline at end of file diff --git a/subworkflows/local/stats.nf b/subworkflows/local/stats.nf index 3eb12a1e..de043d97 100644 --- a/subworkflows/local/stats.nf +++ b/subworkflows/local/stats.nf @@ -16,7 +16,6 @@ workflow STATS { main: - ch_seqs.view() ch_versions = Channel.empty() // // ------------------------------------------- @@ -60,7 +59,6 @@ workflow STATS { csv_seqstats = CONCAT_SEQSTATS.out.csv.map{ meta, csv -> csv } csvs_stats = csv_sim.mix(csv_seqstats).collect().map{ csvs -> [[id:"summary_stats"], csvs] } - csvs_stats.view() MERGE_STATS(csvs_stats) stats_summary = MERGE_STATS.out.csv ch_versions = ch_versions.mix(MERGE_STATS.out.versions) diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index d96af562..7c2f2e96 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -139,7 +139,7 @@ workflow MULTIPLESEQUENCEALIGN { ch_templates_merged = forced_templates.mix( new_templates) // Merge the structures and templates channels, ready for the alignment - ch_structures_template = ch_structures.combine(ch_templates_merged, by:0) + ch_structures_template = ch_templates_merged.combine(ch_structures, by:0) // Compute summary statistics about the input sequences // @@ -160,7 +160,7 @@ workflow MULTIPLESEQUENCEALIGN { // Evaluate the quality of the alignment // if( !params.skip_eval ){ - EVALUATE(ALIGN.out.msa, ch_refs, ch_structures) + EVALUATE(ALIGN.out.msa, ch_refs, ch_structures_template) ch_versions = ch_versions.mix(EVALUATE.out.versions) }