Groovy babyyyyy

drpatelh · Feb 12, 2021 · 6b70a9b · 6b70a9b
1 parent 532ad3d
commit 6b70a9b
Show file tree

Hide file tree

Showing 6 changed files with 164 additions and 133 deletions.
diff --git a/lib/Checks.groovy b/lib/Checks.groovy
@@ -63,92 +63,5 @@ class Checks {
             }
         }
     }
-
-    // Citation string
-    private static String citation(workflow) {
-        return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
-               "* The pipeline\n" + 
-               "  https://doi.org/10.5281/zenodo.1400710\n\n" +
-               "* The nf-core framework\n" +
-               "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
-               "* Software dependencies\n" +
-               "  https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
-    }
-
-    // Print a warning after SRA download has completed
-    static void sra_download(log) {
-        log.warn "=============================================================================\n" +
-                 "  THIS IS AN EXPERIMENTAL FEATURE!\n\n" + 
-                 "  Please double-check the samplesheet that has been auto-created using the\n" +
-                 "  public database ids provided via the '--public_data_ids' parameter.\n\n" +
-                 "  All of the sample metadata obtained from the ENA has been appended\n" +
-                 "  as additional columns to help you manually curate the samplesheet before\n" +
-                 "  you run the main branch of the pipeline.\n" +
-                 "==================================================================================="
-    }
-
-    // Exit pipeline if incorrect --genome key provided
-    static void genome_exists(params, log) {
-        if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
-            log.error "=============================================================================\n" +
-                      "  Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
-                      "  Currently, the available genome keys are:\n" +
-                      "  ${params.genomes.keySet().join(", ")}\n" +
-                      "==================================================================================="
-            System.exit(0)
-        }
-    }
-
-    // Get attribute from genome config file e.g. fasta
-    static String get_genome_attribute(params, attribute) {
-        def val = ''
-        if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
-            if (params.genomes[ params.genome ].containsKey(attribute)) {
-                val = params.genomes[ params.genome ][ attribute ]
-            }
-        }
-        return val
-    }  
-
-    // Print warning if genome fasta has more than one sequence
-    static void is_multifasta(fasta, log) {
-        def count = 0
-        def line  = null
-        fasta.withReader { reader ->
-            while (line = reader.readLine()) {
-                if (line.contains('>')) {
-                    count++
-                    if (count > 1) {
-                        log.warn "=============================================================================\n" +
-                                "  This pipeline does not officially support multi-fasta genome files!\n\n" + 
-                                "  The parameters and processes are tailored for viral genome analysis.\n" +
-                                "  Please amend the '--fasta' parameter.\n" +
-                                "==================================================================================="
-                        break
-                    }
-                }
-            }
-        }
-    }
-
-    // Function that parses and returns the number of mapped reasds from flagstat files
-    static ArrayList get_flagstat_mapped_reads(workflow, params, log, flagstat) {
-        def mapped_reads = 0
-        flagstat.eachLine { line ->
-            if (line.contains(' mapped (')) {
-                mapped_reads = line.tokenize().first().toInteger()
-            }
-        }
 
-        def pass = false
-        def logname = flagstat.getBaseName() - 'flagstat'
-        Map colors = Headers.log_colours(params.monochrome_logs)
-        if (mapped_reads <= params.min_mapped_reads.toInteger()) {
-            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} [FAIL] Mapped read threshold >= ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS: ${mapped_reads} - $logname${colors.reset}."
-        } else {
-            pass = true
-            //log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} [PASS] Mapped read threshold >=${params.min_mapped_reads}: ${mapped_reads} - $logname${colors.reset}."
-        }
-        return [ mapped_reads, pass ]
-    }
 }
diff --git a/lib/Schema.groovy b/lib/Schema.groovy
@@ -100,7 +100,7 @@ class Schema {
             output += "\n"
         }
         output += Headers.dashed_line(params.monochrome_logs)
-        output += "\n\n" + Checks.citation(workflow)
+        output += "\n\n" + Workflow.citation(workflow)
         output += "\n\n" + Headers.dashed_line(params.monochrome_logs)
         return output
     }
@@ -197,7 +197,7 @@ class Schema {
             }
         }
         output += Headers.dashed_line(params.monochrome_logs)
-        output += "\n\n" + Checks.citation(workflow)
+        output += "\n\n" + Workflow.citation(workflow)
         output += "\n\n" + Headers.dashed_line(params.monochrome_logs)
         return output
     }

diff --git a/lib/Workflow.groovy b/lib/Workflow.groovy
@@ -0,0 +1,141 @@
+/*
+ * This file holds several functions used to perform standard checks for the nf-core/viralrecon pipeline.
+ */
+
+class Workflow {
+
+    // Citation string
+    private static String citation(workflow) {
+        return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
+               "* The pipeline\n" + 
+               "  https://doi.org/10.5281/zenodo.1400710\n\n" +
+               "* The nf-core framework\n" +
+               "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
+               "* Software dependencies\n" +
+               "  https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
+    }
+
+    static void validate_params(params, log, valid_params) {
+        genome_exists(params, log)
+
+        // Generic parameter validation
+        if (!params.fasta) { 
+            log.error "Genome fasta file not specified!"
+            System.exit(0)
+        }
+
+        if (!params.skip_kraken2 && !params.kraken2_db) {
+            if (!params.kraken2_db_name) { 
+                log.error "Please specify a valid name to build Kraken2 database for host e.g. 'human'!"
+                System.exit(0)
+            }
+        }
+
+        if (!valid_params['protocols'].contains(params.protocol)) {
+            log.error "Invalid protocol option: ${params.protocol}. Valid options: ${valid_params['protocols'].join(', ')}"
+            System.exit(0)
+        }
+
+        // Variant calling parameter validation
+        def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : []
+        if ((valid_params['callers'] + callers).unique().size() != valid_params['callers'].size()) {
+            log.error "Invalid variant calller option: ${params.callers}. Valid options: ${valid_params['callers'].join(', ')}"
+            System.exit(0)
+        }
+
+        if (params.protocol == 'amplicon' && !params.skip_variants && !params.primer_bed) {
+            log.error "To perform variant calling in 'amplicon' mode please provide a valid primer BED file!"
+            System.exit(0)
+        }
+
+        // Assembly parameter validation
+        def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : []
+        if ((valid_params['assemblers'] + assemblers).unique().size() != valid_params['assemblers'].size()) {
+            log.error "Invalid assembler option: ${params.assemblers}. Valid options: ${valid_params['assemblers'].join(', ')}"
+            System.exit(0)
+        }
+
+        if (!valid_params['spades_modes'].contains(params.spades_mode)) {
+            log.error "Invalid spades mode option: ${params.spades_mode}. Valid options: ${valid_params['spades_modes'].join(', ')}"
+            System.exit(0)
+        }
+    }
+
+    // Print a warning after SRA download has completed
+    static void sra_download(log) {
+        log.warn "=============================================================================\n" +
+                 "  THIS IS AN EXPERIMENTAL FEATURE!\n\n" + 
+                 "  Please double-check the samplesheet that has been auto-created using the\n" +
+                 "  public database ids provided via the '--public_data_ids' parameter.\n\n" +
+                 "  All of the sample metadata obtained from the ENA has been appended\n" +
+                 "  as additional columns to help you manually curate the samplesheet before\n" +
+                 "  you run the main branch of the pipeline.\n" +
+                 "==================================================================================="
+    }
+
+    // Exit pipeline if incorrect --genome key provided
+    static void genome_exists(params, log) {
+        if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
+            log.error "=============================================================================\n" +
+                      "  Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
+                      "  Currently, the available genome keys are:\n" +
+                      "  ${params.genomes.keySet().join(", ")}\n" +
+                      "==================================================================================="
+            System.exit(0)
+        }
+    }
+
+    // Get attribute from genome config file e.g. fasta
+    static String get_genome_attribute(params, attribute) {
+        def val = ''
+        if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
+            if (params.genomes[ params.genome ].containsKey(attribute)) {
+                val = params.genomes[ params.genome ][ attribute ]
+            }
+        }
+        return val
+    }  
+
+    // Print warning if genome fasta has more than one sequence
+    static void is_multifasta(fasta, log) {
+        def count = 0
+        def line  = null
+        fasta.withReader { reader ->
+            while (line = reader.readLine()) {
+                if (line.contains('>')) {
+                    count++
+                    if (count > 1) {
+                        log.warn "=============================================================================\n" +
+                                "  This pipeline does not officially support multi-fasta genome files!\n\n" + 
+                                "  The parameters and processes are tailored for viral genome analysis.\n" +
+                                "  Please amend the '--fasta' parameter.\n" +
+                                "==================================================================================="
+                        break
+                    }
+                }
+            }
+        }
+    }
+
+    // Function that parses and returns the number of mapped reasds from flagstat files
+    static ArrayList get_flagstat_mapped_reads(workflow, params, log, flagstat) {
+        def mapped_reads = 0
+        flagstat.eachLine { line ->
+            if (line.contains(' mapped (')) {
+                mapped_reads = line.tokenize().first().toInteger()
+            }
+        }
+
+        def pass = false
+        def logname = flagstat.getBaseName() - 'flagstat'
+        Map colors = Headers.log_colours(params.monochrome_logs)
+        if (mapped_reads <= params.min_mapped_reads.toInteger()) {
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} [FAIL] Mapped read threshold >= ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS: ${mapped_reads} - $logname${colors.reset}."
+        } else {
+            pass = true
+            //log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} [PASS] Mapped read threshold >=${params.min_mapped_reads}: ${mapped_reads} - $logname${colors.reset}."
+        }
+        return [ mapped_reads, pass ]
+    }
+
+}
diff --git a/main.nf b/main.nf
@@ -26,9 +26,9 @@ if (params.help) {
 /* --        GENOME PARAMETER VALUES           -- */
 ////////////////////////////////////////////////////
 
-params.fasta         = Checks.get_genome_attribute(params, 'fasta')
-params.gff           = Checks.get_genome_attribute(params, 'gff')
-params.bowtie2_index = Checks.get_genome_attribute(params, 'bowtie2')
+params.fasta         = Workflow.get_genome_attribute(params, 'fasta')
+params.gff           = Workflow.get_genome_attribute(params, 'gff')
+params.bowtie2_index = Workflow.get_genome_attribute(params, 'bowtie2')
 
 ////////////////////////////////////////////////////
 /* --         PRINT PARAMETER SUMMARY          -- */

diff --git a/workflows/illumina.nf b/workflows/illumina.nf
@@ -8,55 +8,32 @@ params.summary_params = [:]
 /* --          VALIDATE INPUTS                 -- */
 ////////////////////////////////////////////////////
 
-// Check genome key exists if provided
-Checks.genome_exists(params, log)
+def valid_params = [
+    protocols   : ['metagenomic', 'amplicon'],
+    callers     : ['ivar', 'bcftools'],
+    assemblers  : ['spades', 'unicycler', 'minia'],
+    spades_modes: ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio']
+]
+
+// Validate input parameters
+Workflow.validate_params(params, log, valid_params)
 
 // Check input path parameters to see if they exist
 def checkPathParamList = [
-    params.input, params.fasta, params.gff, 
-    params.bowtie2_index, params.primer_bed, params.primer_fasta,
-    params.multiqc_config
+    params.input, params.fasta, params.gff, params.bowtie2_index,
+    params.kraken2_db, params.primer_bed, params.primer_fasta,
+    params.blast_db, params.spades_hmm, params.multiqc_config
 ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 // Stage dummy file to be used as an optional input where required
 ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true)
 
-// Check mandatory parameters
-if (params.input)  { ch_input = file(params.input) } else { exit 1, 'Input samplesheet file not specified!' }
-if (!params.fasta) { exit 1, 'Genome fasta file not specified!' }
-
-def protocolList = ['metagenomic', 'amplicon']
-if (!protocolList.contains(params.protocol)) {
-    exit 1, "Invalid protocol option: ${params.protocol}. Valid options: ${protocolList.join(', ')}"
-}
-
-// Variant calling parameter validation
-def callerList = ['ivar', 'bcftools']
-def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : []
-if ((callerList + callers).unique().size() != callerList.size()) {
-    exit 1, "Invalid variant calller option: ${params.callers}. Valid options: ${callerList.join(', ')}"
-}
-
-if (params.protocol == 'amplicon' && !params.skip_variants && !params.primer_bed) {
-    exit 1, "To perform variant calling in 'amplicon' mode please provide a valid primer BED file!"
-}
+if (params.input)      { ch_input = file(params.input)           } else { exit 1, 'Input samplesheet file not specified!' }
+if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = ch_dummy_file                   }
 
-// Assembly parameter validation
-def assemblerList = ['spades', 'unicycler', 'minia']
+def callers    = params.callers    ? params.callers.split(',').collect{ it.trim().toLowerCase() }    : []
 def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : []
-if ((assemblerList + assemblers).unique().size() != assemblerList.size()) {
-    exit 1, "Invalid assembler option: ${params.assemblers}. Valid options: ${assemblerList.join(', ')}"
-}
-
-def spadesModeList = ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio']
-if (!spadesModeList.contains(params.spades_mode)) {
-    exit 1, "Invalid spades mode option: ${params.spades_mode}. Valid options: ${spadesModeList.join(', ')}"
-}
-if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = ch_dummy_file }
-
-// if (!params.skip_kraken2 && !params.kraken2_db) {
-//     if (!params.kraken2_db_name) { exit 1, "Please specify a valid name to build Kraken2 database for host e.g. 'human'!" }
 
 ////////////////////////////////////////////////////
 /* --          CONFIG FILES                    -- */
@@ -189,7 +166,7 @@ workflow ILLUMINA {
     )
 
     // Check genome fasta only contains a single contig
-    Checks.is_multifasta(PREPARE_GENOME.out.fasta, log)
+    Workflow.is_multifasta(PREPARE_GENOME.out.fasta, log)
 
     /*
      * SUBWORKFLOW: Read in samplesheet, validate and stage input files
@@ -283,7 +260,7 @@ workflow ILLUMINA {
     ch_fail_mapping_multiqc = Channel.empty()
     if (!params.skip_variants) {
         ch_bowtie2_flagstat_multiqc
-            .map { meta, flagstat -> [ meta ] + Checks.get_flagstat_mapped_reads(workflow, params, log, flagstat) }
+            .map { meta, flagstat -> [ meta ] + Workflow.get_flagstat_mapped_reads(workflow, params, log, flagstat) }
             .set { ch_mapped_reads }
 
         ch_bam

diff --git a/workflows/sra_download.nf b/workflows/sra_download.nf
@@ -103,7 +103,7 @@ workflow SRA_DOWNLOAD {
 workflow.onComplete {
     Completion.email(workflow, params, params.summary_params, projectDir, log)
     Completion.summary(workflow, params, log)
-    Checks.sra_download(log)
+    Viralrecon.sra_download(log)
 }
 
 ////////////////////////////////////////////////////