From e87572cea26295eb256f60232c540dd765844600 Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Wed, 23 Oct 2024 18:19:11 +0200
Subject: [PATCH 01/28] first commit

---
 .github/workflows/ci.yml                      |  1 +
 assets/samplesheet.csv                        |  6 +--
 assets/schema_input.json                      | 11 +++++-
 conf/modules_colabfold.config                 | 16 ++++++--
 conf/test_split_fasta.config                  | 39 +++++++++++++++++++
 docs/usage.md                                 |  2 +
 nextflow.config                               |  2 +
 nextflow_schema.json                          |  5 +++
 .../utils_nfcore_proteinfold_pipeline/main.nf | 12 ++++++
 9 files changed, 86 insertions(+), 8 deletions(-)
 create mode 100644 conf/test_split_fasta.config

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 161ca5e8..196a9393 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -43,6 +43,7 @@ jobs:
           - "test_colabfold_webserver"
           - "test_colabfold_download"
           - "test_esmfold"
+          - "test_split_fasta"
         isMaster:
           - ${{ github.base_ref == 'master' }}
         # Exclude conda and singularity on dev
diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
index 467fdcf0..5e7df047 100644
--- a/assets/samplesheet.csv
+++ b/assets/samplesheet.csv
@@ -1,3 +1,3 @@
-sequence,fasta
-T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta
-T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta
+sequence,fasta,reference,dependencies
+seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz
+toxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin.ref,
\ No newline at end of file
diff --git a/assets/schema_input.json b/assets/schema_input.json
index c261ae58..2bbdf919 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -13,6 +13,12 @@
                 "errorMessage": "Sequence name must be provided and cannot contain spaces",
                 "meta": ["id"]
             },
+            "id": {
+                "type": "string",
+                "pattern": "^\\S+$",
+                "errorMessage": "Sequence name must be provided and cannot contain spaces",
+                "meta": ["id"]
+            },
             "fasta": {
                 "type": "string",
                 "format": "file-path",
@@ -21,6 +27,9 @@
                 "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'"
             }
         },
-        "required": ["sequence", "fasta"]
+        "oneOf": [
+            { "required": ["sequence", "fasta"] },
+            { "required": ["id", "fasta"] }
+        ]
     }
 }
diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config
index 2efcfa01..00da59e7 100644
--- a/conf/modules_colabfold.config
+++ b/conf/modules_colabfold.config
@@ -67,10 +67,18 @@ if (params.colabfold_server == 'local') {
                 params.use_templates ? '--templates' : ''
             ].join(' ').trim()
             publishDir = [
-                path: { "${params.outdir}/colabfold/${params.colabfold_server}" },
-                mode: 'copy',
-                saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-                pattern: '*.*'
+                [
+                    path: { "${params.outdir}/colabfold/${params.colabfold_server}/complete_results" },
+                    mode: 'copy',
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+                    pattern: '*.*'
+                ],
+                [
+                    path: { "${params.outdir}/colabfold/${params.colabfold_server}" },
+                    mode: 'copy',
+                    saveAs: { "${meta.id}.pdb" },
+                    pattern: '*_relaxed_rank_01.pdb'
+                ],
             ]
         }
     }
diff --git a/conf/test_split_fasta.config b/conf/test_split_fasta.config
new file mode 100644
index 00000000..c3feb113
--- /dev/null
+++ b/conf/test_split_fasta.config
@@ -0,0 +1,39 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+    Use as follows:
+        nextflow run nf-core/proteinfold -profile test_colabfold_local,<docker/singularity> --outdir <OUTDIR>
+----------------------------------------------------------------------------------------
+*/
+
+stubRun = true
+
+// Limit resources so that this can run on GitHub Actions
+process {
+    resourceLimits = [
+        cpus: 4,
+        memory: '15.GB',
+        time: '1.h'
+    ]
+}
+
+params {
+    config_profile_name        = 'Test profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Input data to test colabfold with the colabfold webserver analysis
+    mode             = 'colabfold'
+    colabfold_server = 'local'
+    split_fasta      = true
+    colabfold_db     = "${projectDir}/assets/dummy_db_dir"
+    //input          = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv'
+    input            = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv'
+}
+
+process {
+    withName: 'MMSEQS_COLABFOLDSEARCH|COLABFOLD_BATCH' {
+        container = 'biocontainers/gawk:5.1.0'
+    }
+}
diff --git a/docs/usage.md b/docs/usage.md
index 43b0d86a..d4502203 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -35,6 +35,8 @@ The samplesheet can have as many columns as you desire, however, there is a stri
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
+Each FASTA file is assumed to contain a single protein sequence unless you are using multimer mode. If you want to provide a FASTA file with multiple protein sequences, each to be folded individually, you can supply one or more FASTA files containing one or more sequences and use the --split_fasta parameter. In this case, each sequence in the FASTA file will be folded individually and in parallel, as if you had listed each sequence separately in the samplesheet.
+
 ## Running the pipeline
 
 The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are shown below.
diff --git a/nextflow.config b/nextflow.config
index d8fc2623..ed874e2c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -13,6 +13,7 @@ params {
     input                       = null
     mode                        = 'alphafold2' // {alphafold2, colabfold, esmfold}
     use_gpu                     = false
+    split_fasta                 = false
 
     // Alphafold2 parameters
     alphafold2_mode             = "standard"
@@ -240,6 +241,7 @@ profiles {
     test_colabfold_webserver      { includeConfig 'conf/test_colabfold_webserver.config'               }
     test_colabfold_download       { includeConfig 'conf/test_colabfold_download.config'                }
     test_esmfold                  { includeConfig 'conf/test_esmfold.config'                           }
+    test_split_fasta              { includeConfig 'conf/test_split_fasta.config'                       }
     test_full                     { includeConfig 'conf/test_full.config'                              }
     test_full_alphafold2_standard { includeConfig 'conf/test_full.config'                              }
     test_full_alphafold2_split    { includeConfig 'conf/test_full_alphafold_split.config'              }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 313997a8..8df979ce 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -40,6 +40,11 @@
                     "description": "Run on CPUs (default) or GPUs",
                     "fa_icon": "fas fa-microchip"
                 },
+                "split_fasta": {
+                    "type": "boolean",
+                    "description": "Split input fasta file in multiple fasta files each of them containing one sequence to be folded",
+                    "fa_icon": "fas fa-microchip"
+                },
                 "email": {
                     "type": "string",
                     "description": "Email address for completion summary.",
diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
index fa0545a6..9c3ebe1c 100644
--- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
@@ -67,6 +67,18 @@ workflow PIPELINE_INITIALISATION {
     //
     ch_samplesheet = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json"))
 
+    if (params.split_fasta) {
+
+        ch_samplesheet.splitFasta(record: [id:true])
+                    .map{ record -> record.id.toString() }
+                    .set{ ID }.view()
+        ch_samplesheet = ch_samplesheet.map{meta, fasta -> fasta}
+                                    .splitFasta( by:1, file: true )
+                                    .map{fasta -> [[id:record.id], fasta ]}.view()
+    }
+
+    ch_samplesheet.view()
+
     emit:
     samplesheet = ch_samplesheet
     versions    = ch_versions

From d78bf35f140e51ad575f2319e56212ecab656eec Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Fri, 25 Oct 2024 16:04:39 +0200
Subject: [PATCH 02/28] update

---
 assets/samplesheet.csv                        |  3 --
 assets/schema_input.json                      | 11 +------
 conf/modules.config                           |  8 +++++
 conf/modules_alphafold2.config                | 30 +++++++++++++++----
 conf/modules_colabfold.config                 | 20 +++++++++----
 conf/modules_esmfold.config                   | 11 +++++--
 conf/test_split_fasta.config                  |  6 ++--
 docs/output.md                                | 14 +++++----
 main.nf                                       |  6 +++-
 .../utils_nfcore_proteinfold_pipeline/main.nf | 27 +++++++++++------
 10 files changed, 90 insertions(+), 46 deletions(-)
 delete mode 100644 assets/samplesheet.csv

diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
deleted file mode 100644
index 5e7df047..00000000
--- a/assets/samplesheet.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-sequence,fasta,reference,dependencies
-seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz
-toxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin.ref,
\ No newline at end of file
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 2bbdf919..c261ae58 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -13,12 +13,6 @@
                 "errorMessage": "Sequence name must be provided and cannot contain spaces",
                 "meta": ["id"]
             },
-            "id": {
-                "type": "string",
-                "pattern": "^\\S+$",
-                "errorMessage": "Sequence name must be provided and cannot contain spaces",
-                "meta": ["id"]
-            },
             "fasta": {
                 "type": "string",
                 "format": "file-path",
@@ -27,9 +21,6 @@
                 "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'"
             }
         },
-        "oneOf": [
-            { "required": ["sequence", "fasta"] },
-            { "required": ["id", "fasta"] }
-        ]
+        "required": ["sequence", "fasta"]
     }
 }
diff --git a/conf/modules.config b/conf/modules.config
index c56b11eb..5f6fbd9f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -50,6 +50,14 @@ process {
         ]
     }
 
+    withName: 'GENERATE_REPORT'{
+        publishDir = [
+            path: { "${params.outdir}/report" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     withName: 'FOLDSEEK_EASYSEARCH' {
         ext.args   = { params.foldseek_easysearch_arg ? "$params.foldseek_easysearch_arg" : "--format-mode 3" }
         publishDir = [
diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config
index 33b04c38..c8b4fab3 100644
--- a/conf/modules_alphafold2.config
+++ b/conf/modules_alphafold2.config
@@ -40,9 +40,18 @@ if (params.alphafold2_mode == 'standard') {
                 params.max_template_date ? "--max_template_date ${params.max_template_date}" : ''
             ].join(' ').trim()
             publishDir = [
-                path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" },
-                mode: 'copy',
-                saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+                [   
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/complete_results" },
+                    mode: 'copy',
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+                    pattern: '*.*'
+                ],
+                [
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
+                    mode: 'copy',
+                    saveAs: { "${meta.id}.pdb" },
+                    pattern: '*.1.alphafold.pdb'
+                ]
             ]
         }
     }
@@ -64,9 +73,18 @@ if (params.alphafold2_mode == 'split_msa_prediction') {
             if(params.use_gpu) { accelerator = 1 }
             ext.args   = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false'
             publishDir = [
-                path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" },
-                mode: 'copy',
-                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+                [   
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/complete_results" },
+                    mode: 'copy',
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+                    pattern: '*.*'
+                ],
+                [
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
+                    mode: 'copy',
+                    saveAs: { "${meta.id}.pdb" },
+                    pattern: 'ranked_0.pdb'
+                ]
             ]
         }
     }
diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config
index 00da59e7..ecf87d75 100644
--- a/conf/modules_colabfold.config
+++ b/conf/modules_colabfold.config
@@ -30,10 +30,18 @@ if (params.colabfold_server == 'webserver') {
                 params.host_url ? "--host-url ${params.host_url}" : ''
             ].join(' ').trim()
             publishDir = [
-                path: { "${params.outdir}/colabfold/${params.colabfold_server}" },
-                mode: 'copy',
-                saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-                pattern: '*.*'
+                [
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/complete_results" },
+                    mode: 'copy',
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+                    pattern: '*.*'
+                ],
+                [
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}" },
+                    mode: 'copy',
+                    saveAs: { "${meta.id}.pdb" },
+                    pattern: '*_relaxed_rank_01.pdb'
+                ]
             ]
         }
     }
@@ -68,13 +76,13 @@ if (params.colabfold_server == 'local') {
             ].join(' ').trim()
             publishDir = [
                 [
-                    path: { "${params.outdir}/colabfold/${params.colabfold_server}/complete_results" },
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/complete_results" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/colabfold/${params.colabfold_server}" },
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_relaxed_rank_01.pdb'
diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config
index d8356924..ba523450 100644
--- a/conf/modules_esmfold.config
+++ b/conf/modules_esmfold.config
@@ -14,11 +14,18 @@ process {
     withName: 'RUN_ESMFOLD' {
         ext.args = {params.use_gpu ? '' : '--cpu-only'}
         publishDir = [
-                path: { "${params.outdir}/esmfold" },
+            [
+                path: { "${params.outdir}/esmfold/complete_results" },
                 mode: 'copy',
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-                pattern: '*.*'
+                pattern: '*.tsv'
+            ],
+            [
+                path: { "${params.outdir}/esmfold" },
+                mode: 'copy',
+                pattern: '*.pdb'
             ]
+        ]
     }
 
     withName: 'NFCORE_PROTEINFOLD:ESMFOLD:MULTIQC' {
diff --git a/conf/test_split_fasta.config b/conf/test_split_fasta.config
index c3feb113..44130987 100644
--- a/conf/test_split_fasta.config
+++ b/conf/test_split_fasta.config
@@ -24,12 +24,12 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Input data to test colabfold with the colabfold webserver analysis
-    mode             = 'colabfold'
+    mode             = 'colabold'
     colabfold_server = 'local'
     split_fasta      = true
     colabfold_db     = "${projectDir}/assets/dummy_db_dir"
-    //input          = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv'
-    input            = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv'
+    input          = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv'
+    //input            = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv'
 }
 
 process {
diff --git a/docs/output.md b/docs/output.md
index 9b9a8fb8..291614a5 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -23,10 +23,9 @@ The directories listed below will be created in the output directory after the p
 <details markdown="1">
 <summary>Output files</summary>
 
-- `AlphaFold2/`
-  - `<SEQUENCE NAME>/` that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings
-  - `<SEQUENCE NAME>.alphafold.pdb` that is the structure with the highest pLDDT score (ranked first)
-  - `<SEQUENCE NAME>_plddt_mqc.tsv` that presents the pLDDT scores per residue for each of the 5 predicted models
+- `alphafold_standard/` or `alphafold_split_msa_prediction/` based on the selected mode.
+  - `complete_results/` that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
+  - `<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>
@@ -91,7 +90,9 @@ Below you can find an indicative example of the TSV file with the pLDDT scores p
 <details markdown="1">
 <summary>Output files</summary>
 
-- `colabfold/webserver/` or `colabfold/local/` based on the selected mode that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs and scores, prediction metadata, logs and section timings
+- `colabfold_webserver/` or `colabfold_local/` based on the selected mode.
+  - `complete_results/` that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
+  - `<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>
@@ -117,7 +118,8 @@ Below you can find some indicative examples of the output images produced by Col
 
 - `esmfold/`
   - `<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score (ranked first)
-  - `<SEQUENCE NAME>_plddt_mqc.tsv` that presents the pLDDT scores per residue for each of the 5 predicted models
+  - `complete_results`
+    - `<SEQUENCE NAME>_plddt_mqc.tsv` that presents the pLDDT scores per residue.
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>
diff --git a/main.nf b/main.nf
index eaf0eac1..34c1f7e0 100644
--- a/main.nf
+++ b/main.nf
@@ -64,6 +64,7 @@ workflow NFCORE_PROTEINFOLD {
     ch_multiqc      = Channel.empty()
     ch_versions     = Channel.empty()
     ch_report_input = Channel.empty()
+    ch_outputsheet  = Channel.empty()
 
     //
     // WORKFLOW: Run alphafold2
@@ -146,7 +147,6 @@ workflow NFCORE_PROTEINFOLD {
             params.create_colabfold_index
         )
         ch_versions = ch_versions.mix(PREPARE_COLABFOLD_DBS.out.versions)
-
         //
         // WORKFLOW: Run nf-core/colabfold workflow
         //
@@ -159,6 +159,7 @@ workflow NFCORE_PROTEINFOLD {
             PREPARE_COLABFOLD_DBS.out.uniref30,
             params.num_recycles_colabfold
         )
+
         ch_multiqc  = COLABFOLD.out.multiqc_report
         ch_versions = ch_versions.mix(COLABFOLD.out.versions)
         ch_report_input = ch_report_input.mix(
@@ -168,6 +169,8 @@ workflow NFCORE_PROTEINFOLD {
                 .join(COLABFOLD.out.msa)
                 .map { it[0]["model"] = "colabfold"; it }
         )
+        // ch_outputsheet = ch_report_input.transpose(by:1).filter{it[1].name.contains("rank_01")}
+        // ch_outputsheet.view()
     }
 
     //
@@ -231,6 +234,7 @@ workflow NFCORE_PROTEINFOLD {
         )
     }
 
+
     emit:
     multiqc_report = ch_multiqc
 }
diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
index 9c3ebe1c..c3642f50 100644
--- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
@@ -68,17 +68,26 @@ workflow PIPELINE_INITIALISATION {
     ch_samplesheet = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json"))
 
     if (params.split_fasta) {
-
-        ch_samplesheet.splitFasta(record: [id:true])
-                    .map{ record -> record.id.toString() }
-                    .set{ ID }.view()
-        ch_samplesheet = ch_samplesheet.map{meta, fasta -> fasta}
-                                    .splitFasta( by:1, file: true )
-                                    .map{fasta -> [[id:record.id], fasta ]}.view()
+        // Extract all sequence headers from the fasta file
+        // to keep track of which sequences belong to which dataset
+        // and create a new channel [[id:{dataset_id}, sequence:{sequence_id}]]
+        ch_samplesheet.splitFasta(by:1, record: [header:true])
+                      .map{meta, record -> [record.header, meta]}
+                      .set{dataset_sequence_mapping}
+
+        // Split the fasta file into individual files for each sequence
+        ch_samplesheet.map{ meta,fasta -> fasta}
+                        .splitFasta( record: [id: true, sequence: true] )
+                        .collectFile { item ->
+                            [ "${item["id"]}.fa", ">" + item["id"] + '\n' +item["sequence"] ]
+                        }.map{
+                            file -> [file.baseName, file]
+                        }.combine(dataset_sequence_mapping, by:0)
+                        .map{
+                            id, file, meta -> [[id:id, dataset:meta.id], file]
+                        }.set{ch_samplesheet}
     }
 
-    ch_samplesheet.view()
-
     emit:
     samplesheet = ch_samplesheet
     versions    = ch_versions

From 66968b9c6d02e86672c39151fd9ba402593768de Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Fri, 25 Oct 2024 17:10:39 +0200
Subject: [PATCH 03/28] update

---
 conf/test_split_fasta.config                  |  5 +--
 .../utils_nfcore_proteinfold_pipeline/main.nf | 40 +++++++++++++------
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/conf/test_split_fasta.config b/conf/test_split_fasta.config
index 44130987..9eca8853 100644
--- a/conf/test_split_fasta.config
+++ b/conf/test_split_fasta.config
@@ -24,16 +24,15 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Input data to test colabfold with the colabfold webserver analysis
-    mode             = 'colabold'
+    mode             = 'colabfold'
     colabfold_server = 'local'
     split_fasta      = true
     colabfold_db     = "${projectDir}/assets/dummy_db_dir"
     input          = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv'
-    //input            = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv'
 }
 
 process {
     withName: 'MMSEQS_COLABFOLDSEARCH|COLABFOLD_BATCH' {
         container = 'biocontainers/gawk:5.1.0'
     }
-}
+}
\ No newline at end of file
diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
index c3642f50..faf0b3ff 100644
--- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
@@ -66,25 +66,20 @@ workflow PIPELINE_INITIALISATION {
     // Create channel from input file provided through params.input
     //
     ch_samplesheet = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json"))
-
     if (params.split_fasta) {
-        // Extract all sequence headers from the fasta file
-        // to keep track of which sequences belong to which dataset
-        // and create a new channel [[id:{dataset_id}, sequence:{sequence_id}]]
-        ch_samplesheet.splitFasta(by:1, record: [header:true])
-                      .map{meta, record -> [record.header, meta]}
-                      .set{dataset_sequence_mapping}
+        // here we have to validate that the ids are unique and valid as an extra step
+        // since it is not done with the samplesheet schema (they are all in the same file)
+        ch_samplesheet.map { meta, fasta ->
+            validateFasta(fasta)
+        }
 
         // Split the fasta file into individual files for each sequence
         ch_samplesheet.map{ meta,fasta -> fasta}
-                        .splitFasta( record: [id: true, sequence: true] )
+                        .splitFasta( record: [header: true, sequence: true] )
                         .collectFile { item ->
-                            [ "${item["id"]}.fa", ">" + item["id"] + '\n' +item["sequence"] ]
+                            [ "${cleanHeader(item["header"])}.fa", ">" + cleanHeader(item["header"]) + '\n' +item["sequence"] ]
                         }.map{
-                            file -> [file.baseName, file]
-                        }.combine(dataset_sequence_mapping, by:0)
-                        .map{
-                            id, file, meta -> [[id:id, dataset:meta.id], file]
+                            file -> [[id: file.baseName], file]
                         }.set{ch_samplesheet}
     }
 
@@ -235,3 +230,22 @@ def methodsDescriptionText(mqc_methods_yaml) {
     return description_html.toString()
 }
 
+def cleanHeader(header) {
+    return header.replaceAll(" ", "_").replaceAll(",", "").replaceAll(";","")
+}
+
+def validateFasta(fasta) {
+    // extract headers 
+    def headers = fasta.findAll { it.startsWith('>') }
+    // if headers are not unique, throw an error
+    if (headers.size() != headers.unique().size()) {
+        throw new Exception("Invalid FASTA file. The headers are not unique.")
+    }
+    // check headers that are malformed 
+    headers.each { header ->
+        if (header =~ /[ \t;,]/) {
+            // warn user that the header contains special characters
+            log.warn "The header ${header} contains special characters. They have been automatically removed."
+        }
+    }
+}
\ No newline at end of file

From a2ab2cedc1532da32c2f1168a8691208a9a216ed Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Fri, 25 Oct 2024 17:18:20 +0200
Subject: [PATCH 04/28] revert main.nf

---
 main.nf | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/main.nf b/main.nf
index 34c1f7e0..eaf0eac1 100644
--- a/main.nf
+++ b/main.nf
@@ -64,7 +64,6 @@ workflow NFCORE_PROTEINFOLD {
     ch_multiqc      = Channel.empty()
     ch_versions     = Channel.empty()
     ch_report_input = Channel.empty()
-    ch_outputsheet  = Channel.empty()
 
     //
     // WORKFLOW: Run alphafold2
@@ -147,6 +146,7 @@ workflow NFCORE_PROTEINFOLD {
             params.create_colabfold_index
         )
         ch_versions = ch_versions.mix(PREPARE_COLABFOLD_DBS.out.versions)
+
         //
         // WORKFLOW: Run nf-core/colabfold workflow
         //
@@ -159,7 +159,6 @@ workflow NFCORE_PROTEINFOLD {
             PREPARE_COLABFOLD_DBS.out.uniref30,
             params.num_recycles_colabfold
         )
-
         ch_multiqc  = COLABFOLD.out.multiqc_report
         ch_versions = ch_versions.mix(COLABFOLD.out.versions)
         ch_report_input = ch_report_input.mix(
@@ -169,8 +168,6 @@ workflow NFCORE_PROTEINFOLD {
                 .join(COLABFOLD.out.msa)
                 .map { it[0]["model"] = "colabfold"; it }
         )
-        // ch_outputsheet = ch_report_input.transpose(by:1).filter{it[1].name.contains("rank_01")}
-        // ch_outputsheet.view()
     }
 
     //
@@ -234,7 +231,6 @@ workflow NFCORE_PROTEINFOLD {
         )
     }
 
-
     emit:
     multiqc_report = ch_multiqc
 }

From 91dead17fbfa29ddc937fada5677b377efb6a9e4 Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Fri, 25 Oct 2024 17:28:49 +0200
Subject: [PATCH 05/28] fix output folder

---
 conf/modules_alphafold2.config | 12 ++++++------
 conf/modules_colabfold.config  |  8 ++++----
 conf/modules_esmfold.config    |  6 +++---
 docs/output.md                 | 15 ++++++---------
 4 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config
index c8b4fab3..b9deab54 100644
--- a/conf/modules_alphafold2.config
+++ b/conf/modules_alphafold2.config
@@ -40,14 +40,14 @@ if (params.alphafold2_mode == 'standard') {
                 params.max_template_date ? "--max_template_date ${params.max_template_date}" : ''
             ].join(' ').trim()
             publishDir = [
-                [   
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/complete_results" },
+                [
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/best_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*.1.alphafold.pdb'
@@ -63,7 +63,7 @@ if (params.alphafold2_mode == 'split_msa_prediction') {
         withName: 'RUN_ALPHAFOLD2_MSA' {
             ext.args =  params.max_template_date ? "--max_template_date ${params.max_template_date}" : ''
             publishDir = [
-                path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" },
+                path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
                 mode: 'copy',
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
@@ -74,13 +74,13 @@ if (params.alphafold2_mode == 'split_msa_prediction') {
             ext.args   = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false'
             publishDir = [
                 [   
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/complete_results" },
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/best_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: 'ranked_0.pdb'
diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config
index ecf87d75..922a3da5 100644
--- a/conf/modules_colabfold.config
+++ b/conf/modules_colabfold.config
@@ -31,13 +31,13 @@ if (params.colabfold_server == 'webserver') {
             ].join(' ').trim()
             publishDir = [
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/complete_results" },
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}" },
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/best_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_relaxed_rank_01.pdb'
@@ -76,13 +76,13 @@ if (params.colabfold_server == 'local') {
             ].join(' ').trim()
             publishDir = [
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/complete_results" },
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}" },
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/best_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_relaxed_rank_01.pdb'
diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config
index ba523450..92c2405a 100644
--- a/conf/modules_esmfold.config
+++ b/conf/modules_esmfold.config
@@ -15,13 +15,13 @@ process {
         ext.args = {params.use_gpu ? '' : '--cpu-only'}
         publishDir = [
             [
-                path: { "${params.outdir}/esmfold/complete_results" },
+                path: { "${params.outdir}/esmfold" },
                 mode: 'copy',
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-                pattern: '*.tsv'
+                pattern: '*'
             ],
             [
-                path: { "${params.outdir}/esmfold" },
+                path: { "${params.outdir}/esmfold/best_structures" },
                 mode: 'copy',
                 pattern: '*.pdb'
             ]
diff --git a/docs/output.md b/docs/output.md
index 291614a5..faa7da7f 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -23,9 +23,8 @@ The directories listed below will be created in the output directory after the p
 <details markdown="1">
 <summary>Output files</summary>
 
-- `alphafold_standard/` or `alphafold_split_msa_prediction/` based on the selected mode.
-  - `complete_results/` that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
-  - `<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score (ranked first)
+- `alphafold_standard/` or `alphafold_split_msa_prediction/` based on the selected mode. Contain the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
+  - `best_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>
@@ -90,9 +89,8 @@ Below you can find an indicative example of the TSV file with the pLDDT scores p
 <details markdown="1">
 <summary>Output files</summary>
 
-- `colabfold_webserver/` or `colabfold_local/` based on the selected mode.
-  - `complete_results/` that contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
-  - `<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score (ranked first)
+- `colabfold_webserver/` or `colabfold_local/` based on the selected mode. Contain the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
+  - `best_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>
@@ -117,9 +115,8 @@ Below you can find some indicative examples of the output images produced by Col
 <summary>Output files</summary>
 
 - `esmfold/`
-  - `<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score (ranked first)
-  - `complete_results`
-    - `<SEQUENCE NAME>_plddt_mqc.tsv` that presents the pLDDT scores per residue.
+  contains the predicted structures. 
+  - `best_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>

From a22a92d8d25afaec9700dffa933fab793bce9d3a Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Fri, 25 Oct 2024 17:44:01 +0200
Subject: [PATCH 06/28] update

---
 conf/modules_alphafold2.config                              | 2 +-
 conf/test_split_fasta.config                                | 2 +-
 .../local/utils_nfcore_proteinfold_pipeline/main.nf         | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config
index b9deab54..c27defbd 100644
--- a/conf/modules_alphafold2.config
+++ b/conf/modules_alphafold2.config
@@ -73,7 +73,7 @@ if (params.alphafold2_mode == 'split_msa_prediction') {
             if(params.use_gpu) { accelerator = 1 }
             ext.args   = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false'
             publishDir = [
-                [   
+                [
                     path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
diff --git a/conf/test_split_fasta.config b/conf/test_split_fasta.config
index 9eca8853..b7e5ead0 100644
--- a/conf/test_split_fasta.config
+++ b/conf/test_split_fasta.config
@@ -35,4 +35,4 @@ process {
     withName: 'MMSEQS_COLABFOLDSEARCH|COLABFOLD_BATCH' {
         container = 'biocontainers/gawk:5.1.0'
     }
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
index faf0b3ff..6611eefe 100644
--- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
@@ -235,17 +235,17 @@ def cleanHeader(header) {
 }
 
 def validateFasta(fasta) {
-    // extract headers 
+    // extract headers
     def headers = fasta.findAll { it.startsWith('>') }
     // if headers are not unique, throw an error
     if (headers.size() != headers.unique().size()) {
         throw new Exception("Invalid FASTA file. The headers are not unique.")
     }
-    // check headers that are malformed 
+    // check headers that are malformed
     headers.each { header ->
         if (header =~ /[ \t;,]/) {
             // warn user that the header contains special characters
             log.warn "The header ${header} contains special characters. They have been automatically removed."
         }
     }
-}
\ No newline at end of file
+}

From 58a19c891ccfd2d95489c9ea62430c6fb52dd17a Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Fri, 25 Oct 2024 17:45:40 +0200
Subject: [PATCH 07/28] fix lintin

---
 docs/output.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index faa7da7f..cc74c1d0 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -115,7 +115,7 @@ Below you can find some indicative examples of the output images produced by Col
 <summary>Output files</summary>
 
 - `esmfold/`
-  contains the predicted structures. 
+  contains the predicted structures.
   - `best_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 

From 250095188c7c7f7c10e7bfefb8cde810878db5de Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 10:24:20 +0100
Subject: [PATCH 08/28] Update conf/modules_alphafold2.config

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 conf/modules_alphafold2.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config
index c27defbd..90031abd 100644
--- a/conf/modules_alphafold2.config
+++ b/conf/modules_alphafold2.config
@@ -47,7 +47,7 @@ if (params.alphafold2_mode == 'standard') {
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/best_structures" },
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*.1.alphafold.pdb'

From ace5aef6c384a87e66a4e7e83e445abae1c61a4e Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 10:24:28 +0100
Subject: [PATCH 09/28] Update conf/modules_alphafold2.config

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 conf/modules_alphafold2.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config
index 90031abd..cfc063a7 100644
--- a/conf/modules_alphafold2.config
+++ b/conf/modules_alphafold2.config
@@ -80,7 +80,7 @@ if (params.alphafold2_mode == 'split_msa_prediction') {
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/best_structures" },
+                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: 'ranked_0.pdb'

From b32702e050d82e1034d8161a453c995c0b64549b Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 10:24:38 +0100
Subject: [PATCH 10/28] Update conf/modules_colabfold.config

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 conf/modules_colabfold.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config
index 922a3da5..c1d89c7c 100644
--- a/conf/modules_colabfold.config
+++ b/conf/modules_colabfold.config
@@ -37,7 +37,7 @@ if (params.colabfold_server == 'webserver') {
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/best_structures" },
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_relaxed_rank_01.pdb'

From 47b252f8317da60c1fcbf262a636526a0d6daf59 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 10:24:45 +0100
Subject: [PATCH 11/28] Update conf/modules_colabfold.config

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 conf/modules_colabfold.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config
index c1d89c7c..cfd1b7a1 100644
--- a/conf/modules_colabfold.config
+++ b/conf/modules_colabfold.config
@@ -82,7 +82,7 @@ if (params.colabfold_server == 'local') {
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/best_structures" },
+                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_relaxed_rank_01.pdb'

From d6d798e67f58289dd5ba0115f17163d10eb6561e Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 10:24:52 +0100
Subject: [PATCH 12/28] Update conf/modules_esmfold.config

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 conf/modules_esmfold.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config
index 92c2405a..9af290fc 100644
--- a/conf/modules_esmfold.config
+++ b/conf/modules_esmfold.config
@@ -21,7 +21,7 @@ process {
                 pattern: '*'
             ],
             [
-                path: { "${params.outdir}/esmfold/best_structures" },
+                path: { "${params.outdir}/esmfold/top_ranked_structures" },
                 mode: 'copy',
                 pattern: '*.pdb'
             ]

From b987430625bc54f9d0cb677b2bee8ab117559548 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 10:25:06 +0100
Subject: [PATCH 13/28] Update docs/output.md

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 docs/output.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index cc74c1d0..fef008eb 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -24,7 +24,7 @@ The directories listed below will be created in the output directory after the p
 <summary>Output files</summary>
 
 - `alphafold_standard/` or `alphafold_split_msa_prediction/` based on the selected mode. Contain the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
-  - `best_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
+  - `top_ranked_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>

From 1a09418690a9534d7bf1d0152b594e82e026cbe2 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 10:25:17 +0100
Subject: [PATCH 14/28] Update docs/usage.md

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 docs/usage.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage.md b/docs/usage.md
index d4502203..8f3f5105 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -35,7 +35,7 @@ The samplesheet can have as many columns as you desire, however, there is a stri
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
-Each FASTA file is assumed to contain a single protein sequence unless you are using multimer mode. If you want to provide a FASTA file with multiple protein sequences, each to be folded individually, you can supply one or more FASTA files containing one or more sequences and use the --split_fasta parameter. In this case, each sequence in the FASTA file will be folded individually and in parallel, as if you had listed each sequence separately in the samplesheet.
+Each FASTA file should contain a single protein sequence unless using multimer mode. To provide a FASTA file with multiple sequences for individual folding, you can use one or more FASTA files with the --split_fasta parameter. This will treat each sequence in the FASTA file as a separate entry, folding them individually and in parallel, as if each sequence were listed separately in the samplesheet.
 
 ## Running the pipeline
 

From 1bf0d3b17f2eb2b5f8252cc6f3016e2e7abb0368 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 10:25:33 +0100
Subject: [PATCH 15/28] Update nextflow_schema.json

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 8df979ce..d8191d6c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -42,7 +42,7 @@
                 },
                 "split_fasta": {
                     "type": "boolean",
-                    "description": "Split input fasta file in multiple fasta files each of them containing one sequence to be folded",
+                    "description": "Split input multi-fasta file in separated fasta files each of them containing one sequence to be folded",
                     "fa_icon": "fas fa-microchip"
                 },
                 "email": {

From c8de3181c80deb180e2e801d10a08b57fce1d516 Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Mon, 18 Nov 2024 11:25:39 +0100
Subject: [PATCH 16/28] update config af2

---
 conf/modules_alphafold2.config | 4 ++--
 conf/modules_esmfold.config    | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config
index cfc063a7..18e1022b 100644
--- a/conf/modules_alphafold2.config
+++ b/conf/modules_alphafold2.config
@@ -50,7 +50,7 @@ if (params.alphafold2_mode == 'standard') {
                     path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
-                    pattern: '*.1.alphafold.pdb'
+                    pattern: '*_alphafold2.pdb'
                 ]
             ]
         }
@@ -83,7 +83,7 @@ if (params.alphafold2_mode == 'split_msa_prediction') {
                     path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
-                    pattern: 'ranked_0.pdb'
+                    pattern: '*_alphafold2.pdb'
                 ]
             ]
         }
diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config
index 9af290fc..30c80772 100644
--- a/conf/modules_esmfold.config
+++ b/conf/modules_esmfold.config
@@ -23,6 +23,7 @@ process {
             [
                 path: { "${params.outdir}/esmfold/top_ranked_structures" },
                 mode: 'copy',
+                saveAs: { "${meta.id}.pdb" },
                 pattern: '*.pdb'
             ]
         ]

From b53fa81e37a97f04a009c6f15a379439dfbbc726 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Thu, 21 Nov 2024 14:24:25 +0000
Subject: [PATCH 17/28] fix

---
 conf/modules.config | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 5f6fbd9f..c56b11eb 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -50,14 +50,6 @@ process {
         ]
     }
 
-    withName: 'GENERATE_REPORT'{
-        publishDir = [
-            path: { "${params.outdir}/report" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
     withName: 'FOLDSEEK_EASYSEARCH' {
         ext.args   = { params.foldseek_easysearch_arg ? "$params.foldseek_easysearch_arg" : "--format-mode 3" }
         publishDir = [

From 673d30417b266ef393878be3488aa756d1b8925d Mon Sep 17 00:00:00 2001
From: luisas <lsantus@login1.linux.crg.es>
Date: Thu, 21 Nov 2024 17:58:09 +0100
Subject: [PATCH 18/28] update

---
 conf/modules_colabfold.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config
index cfd1b7a1..21021900 100644
--- a/conf/modules_colabfold.config
+++ b/conf/modules_colabfold.config
@@ -40,7 +40,7 @@ if (params.colabfold_server == 'webserver') {
                     path: { "${params.outdir}/colabfold_${params.colabfold_server}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
-                    pattern: '*_relaxed_rank_01.pdb'
+                    pattern: '*_relaxed_rank_001*.pdb'
                 ]
             ]
         }
@@ -85,7 +85,7 @@ if (params.colabfold_server == 'local') {
                     path: { "${params.outdir}/colabfold_${params.colabfold_server}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
-                    pattern: '*_relaxed_rank_01.pdb'
+                    pattern: '*_relaxed_rank_001*.pdb'
                 ],
             ]
         }

From 3f22c4291c2a3a47a33997e6076653a7f62e269d Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 11:08:42 +0100
Subject: [PATCH 19/28] Update conf/test_split_fasta.config

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 conf/test_split_fasta.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/test_split_fasta.config b/conf/test_split_fasta.config
index b7e5ead0..a1c3c683 100644
--- a/conf/test_split_fasta.config
+++ b/conf/test_split_fasta.config
@@ -28,7 +28,7 @@ params {
     colabfold_server = 'local'
     split_fasta      = true
     colabfold_db     = "${projectDir}/assets/dummy_db_dir"
-    input          = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv'
+    input            = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv'
 }
 
 process {

From 4e748ad97031b3946649f46169208d3dc94ccfdb Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 11:09:14 +0100
Subject: [PATCH 20/28] Update
 subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
index 6611eefe..2b57012b 100644
--- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
@@ -67,7 +67,7 @@ workflow PIPELINE_INITIALISATION {
     //
     ch_samplesheet = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json"))
     if (params.split_fasta) {
-        // here we have to validate that the ids are unique and valid as an extra step
+        // TODO: here we have to validate that the ids are unique and valid as an extra step
         // since it is not done with the samplesheet schema (they are all in the same file)
         ch_samplesheet.map { meta, fasta ->
             validateFasta(fasta)

From 15ac126a289d80792da886032ea21b6030a0e5e2 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 11:10:21 +0100
Subject: [PATCH 21/28] Update
 subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 .../utils_nfcore_proteinfold_pipeline/main.nf   | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
index 2b57012b..c9bd0d57 100644
--- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf
@@ -74,13 +74,16 @@ workflow PIPELINE_INITIALISATION {
         }
 
         // Split the fasta file into individual files for each sequence
-        ch_samplesheet.map{ meta,fasta -> fasta}
-                        .splitFasta( record: [header: true, sequence: true] )
-                        .collectFile { item ->
-                            [ "${cleanHeader(item["header"])}.fa", ">" + cleanHeader(item["header"]) + '\n' +item["sequence"] ]
-                        }.map{
-                            file -> [[id: file.baseName], file]
-                        }.set{ch_samplesheet}
+        ch_samplesheet
+            .map { meta,fasta -> fasta }
+            .splitFasta( record: [header: true, sequence: true] )
+            .collectFile { item ->
+                [ "${cleanHeader(item["header"])}.fa", ">" + cleanHeader(item["header"]) + '\n' +item["sequence"] ]
+            }
+            .map {
+                file -> [[id: file.baseName], file]
+            }
+            .set { ch_samplesheet }
     }
 
     emit:

From c80c6c18bd06ee201e77c299283e0f58796cf284 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 11:10:32 +0100
Subject: [PATCH 22/28] Update docs/usage.md

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 docs/usage.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage.md b/docs/usage.md
index 5ac9cb66..3ac88ecd 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -35,7 +35,7 @@ The samplesheet can have as many columns as you desire, however, there is a stri
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
-Each FASTA file should contain a single protein sequence unless using multimer mode. To provide a FASTA file with multiple sequences for individual folding, you can use one or more FASTA files with the --split_fasta parameter. This will treat each sequence in the FASTA file as a separate entry, folding them individually and in parallel, as if each sequence were listed separately in the samplesheet.
+Each FASTA file should contain a single protein sequence unless using multimer mode. To provide a FASTA file with multiple sequences for individual folding, you can use one or more FASTA files with the `--split_fasta` parameter. This will treat each sequence in the FASTA file as a separate entry, folding them individually and in parallel, as if each sequence were listed separately in the samplesheet.
 
 ## Running the pipeline
 

From 5e97599fefb0a10c3ace18023380742f1c5fc3ff Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 11:10:44 +0100
Subject: [PATCH 23/28] Update docs/output.md

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 docs/output.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index fef008eb..c9f35bd3 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -23,7 +23,7 @@ The directories listed below will be created in the output directory after the p
 <details markdown="1">
 <summary>Output files</summary>
 
-- `alphafold_standard/` or `alphafold_split_msa_prediction/` based on the selected mode. Contain the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
+- `alphafold2_standard/` or `alphafold2_split_msa_prediction/` based on the selected mode. It contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
   - `top_ranked_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 

From f88c9dfce66449e7076f90e058a4b2ceb3b2cfdb Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 10:11:54 +0000
Subject: [PATCH 24/28] fix leftover

---
 conf/modules_esmfold.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config
index 30c80772..967d77f7 100644
--- a/conf/modules_esmfold.config
+++ b/conf/modules_esmfold.config
@@ -18,7 +18,7 @@ process {
                 path: { "${params.outdir}/esmfold" },
                 mode: 'copy',
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-                pattern: '*'
+                pattern: '*.*'
             ],
             [
                 path: { "${params.outdir}/esmfold/top_ranked_structures" },

From ec35c2b31d116e42d877cb4b55fd47c5f92a3149 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 10:13:45 +0000
Subject: [PATCH 25/28] update samplesheet

---
 assets/samplesheet.csv | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 assets/samplesheet.csv

diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
new file mode 100644
index 00000000..b458d604
--- /dev/null
+++ b/assets/samplesheet.csv
@@ -0,0 +1,3 @@
+id,fasta
+T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta
+T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta

From 1ffad8ff05ea0bf29e454e46de43854ba8afec65 Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 10:20:10 +0000
Subject: [PATCH 26/28] fix review

---
 conf/modules_alphafold2.config |  8 ++++----
 conf/modules_colabfold.config  |  8 ++++----
 conf/modules_esmfold.config    |  4 ++--
 docs/output.md                 | 10 +++++-----
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config
index 18e1022b..a12105ab 100644
--- a/conf/modules_alphafold2.config
+++ b/conf/modules_alphafold2.config
@@ -41,13 +41,13 @@ if (params.alphafold2_mode == 'standard') {
             ].join(' ').trim()
             publishDir = [
                 [
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
+                    path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/top_ranked_structures" },
+                    path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_alphafold2.pdb'
@@ -74,13 +74,13 @@ if (params.alphafold2_mode == 'split_msa_prediction') {
             ext.args   = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false'
             publishDir = [
                 [
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" },
+                    path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}/top_ranked_structures" },
+                    path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_alphafold2.pdb'
diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config
index 21021900..c37214d3 100644
--- a/conf/modules_colabfold.config
+++ b/conf/modules_colabfold.config
@@ -31,13 +31,13 @@ if (params.colabfold_server == 'webserver') {
             ].join(' ').trim()
             publishDir = [
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}" },
+                    path: { "${params.outdir}/colabfold/${params.colabfold_server}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/top_ranked_structures" },
+                    path: { "${params.outdir}/colabfold/${params.colabfold_server}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_relaxed_rank_001*.pdb'
@@ -76,13 +76,13 @@ if (params.colabfold_server == 'local') {
             ].join(' ').trim()
             publishDir = [
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}" },
+                    path: { "${params.outdir}/colabfold/${params.colabfold_server}" },
                     mode: 'copy',
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                     pattern: '*.*'
                 ],
                 [
-                    path: { "${params.outdir}/colabfold_${params.colabfold_server}/top_ranked_structures" },
+                    path: { "${params.outdir}/colabfold/${params.colabfold_server}/top_ranked_structures" },
                     mode: 'copy',
                     saveAs: { "${meta.id}.pdb" },
                     pattern: '*_relaxed_rank_001*.pdb'
diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config
index 967d77f7..3468718f 100644
--- a/conf/modules_esmfold.config
+++ b/conf/modules_esmfold.config
@@ -15,13 +15,13 @@ process {
         ext.args = {params.use_gpu ? '' : '--cpu-only'}
         publishDir = [
             [
-                path: { "${params.outdir}/esmfold" },
+                path: { "${params.outdir}/esmfold/default" },
                 mode: 'copy',
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                 pattern: '*.*'
             ],
             [
-                path: { "${params.outdir}/esmfold/top_ranked_structures" },
+                path: { "${params.outdir}/esmfold/default/top_ranked_structures" },
                 mode: 'copy',
                 saveAs: { "${meta.id}.pdb" },
                 pattern: '*.pdb'
diff --git a/docs/output.md b/docs/output.md
index c9f35bd3..542c8140 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -23,7 +23,7 @@ The directories listed below will be created in the output directory after the p
 <details markdown="1">
 <summary>Output files</summary>
 
-- `alphafold2_standard/` or `alphafold2_split_msa_prediction/` based on the selected mode. It contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
+- `alphafold2/standard/` or `alphafold2/split_msa_prediction/` based on the selected mode. It contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
   - `top_ranked_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
@@ -89,8 +89,8 @@ Below you can find an indicative example of the TSV file with the pLDDT scores p
 <details markdown="1">
 <summary>Output files</summary>
 
-- `colabfold_webserver/` or `colabfold_local/` based on the selected mode. Contain the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
-  - `best_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
+- `colabfold/webserver/` or `colabfold/local/` based on the selected mode. Contain the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
+  - `top_ranked_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>
@@ -114,9 +114,9 @@ Below you can find some indicative examples of the output images produced by Col
 <details markdown="1">
 <summary>Output files</summary>
 
-- `esmfold/`
+- `esmfold/default`
   contains the predicted structures.
-  - `best_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
+  - `top_ranked_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 
 </details>

From 4ddb088f27abc31277904b81d17f8ff0a4f47e09 Mon Sep 17 00:00:00 2001
From: Jose Espinosa-Carrasco <kadomu@gmail.com>
Date: Tue, 26 Nov 2024 11:39:51 +0100
Subject: [PATCH 27/28] Update docs/output.md

---
 docs/output.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index 542c8140..df1591b4 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -89,7 +89,7 @@ Below you can find an indicative example of the TSV file with the pLDDT scores p
 <details markdown="1">
 <summary>Output files</summary>
 
-- `colabfold/webserver/` or `colabfold/local/` based on the selected mode. Contain the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
+- `colabfold/webserver/` or `colabfold/local/` based on the selected mode. It contains the computed MSAs, unrelaxed structures, relaxed structures, ranked structures, raw model outputs, prediction metadata, and section timings. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the 5 predicted models.
   - `top_ranked_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files
 

From 3742d4aa8f150557b46b19fffe0b2aaffb20beeb Mon Sep 17 00:00:00 2001
From: Luisa Santus <luisa.santus95@gmail.com>
Date: Tue, 26 Nov 2024 10:45:49 +0000
Subject: [PATCH 28/28] fix output

---
 docs/output.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index 542c8140..0f114a2d 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -115,7 +115,7 @@ Below you can find some indicative examples of the output images produced by Col
 <summary>Output files</summary>
 
 - `esmfold/default`
-  contains the predicted structures.
+  contains the predicted structures. Specifically, `<SEQUENCE NAME>_plddt_mqc.tsv` presents the pLDDT scores per residue for each of the predicted models.
   - `top_ranked_structures/<SEQUENCE NAME>.pdb` that is the structure with the highest pLDDT score per input (ranked first)
 - `DBs/` that contains symbolic links to the downloaded database and parameter files