Skip to content

Commit

Permalink
Groovy babyyyyy
Browse files Browse the repository at this point in the history
  • Loading branch information
drpatelh committed Feb 12, 2021
1 parent 532ad3d commit 6b70a9b
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 133 deletions.
87 changes: 0 additions & 87 deletions lib/Checks.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -63,92 +63,5 @@ class Checks {
}
}
}

// Citation string
private static String citation(workflow) {
return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
"* The pipeline\n" +
" https://doi.org/10.5281/zenodo.1400710\n\n" +
"* The nf-core framework\n" +
" https://doi.org/10.1038/s41587-020-0439-x\n\n" +
"* Software dependencies\n" +
" https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
}

// Print a warning after SRA download has completed
static void sra_download(log) {
log.warn "=============================================================================\n" +
" THIS IS AN EXPERIMENTAL FEATURE!\n\n" +
" Please double-check the samplesheet that has been auto-created using the\n" +
" public database ids provided via the '--public_data_ids' parameter.\n\n" +
" All of the sample metadata obtained from the ENA has been appended\n" +
" as additional columns to help you manually curate the samplesheet before\n" +
" you run the main branch of the pipeline.\n" +
"==================================================================================="
}

// Exit pipeline if incorrect --genome key provided
static void genome_exists(params, log) {
if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
log.error "=============================================================================\n" +
" Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
" Currently, the available genome keys are:\n" +
" ${params.genomes.keySet().join(", ")}\n" +
"==================================================================================="
System.exit(0)
}
}

// Get attribute from genome config file e.g. fasta
static String get_genome_attribute(params, attribute) {
def val = ''
if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
if (params.genomes[ params.genome ].containsKey(attribute)) {
val = params.genomes[ params.genome ][ attribute ]
}
}
return val
}

// Print warning if genome fasta has more than one sequence
static void is_multifasta(fasta, log) {
def count = 0
def line = null
fasta.withReader { reader ->
while (line = reader.readLine()) {
if (line.contains('>')) {
count++
if (count > 1) {
log.warn "=============================================================================\n" +
" This pipeline does not officially support multi-fasta genome files!\n\n" +
" The parameters and processes are tailored for viral genome analysis.\n" +
" Please amend the '--fasta' parameter.\n" +
"==================================================================================="
break
}
}
}
}
}

// Function that parses and returns the number of mapped reasds from flagstat files
static ArrayList get_flagstat_mapped_reads(workflow, params, log, flagstat) {
def mapped_reads = 0
flagstat.eachLine { line ->
if (line.contains(' mapped (')) {
mapped_reads = line.tokenize().first().toInteger()
}
}

def pass = false
def logname = flagstat.getBaseName() - 'flagstat'
Map colors = Headers.log_colours(params.monochrome_logs)
if (mapped_reads <= params.min_mapped_reads.toInteger()) {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} [FAIL] Mapped read threshold >= ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS: ${mapped_reads} - $logname${colors.reset}."
} else {
pass = true
//log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} [PASS] Mapped read threshold >=${params.min_mapped_reads}: ${mapped_reads} - $logname${colors.reset}."
}
return [ mapped_reads, pass ]
}
}
4 changes: 2 additions & 2 deletions lib/Schema.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class Schema {
output += "\n"
}
output += Headers.dashed_line(params.monochrome_logs)
output += "\n\n" + Checks.citation(workflow)
output += "\n\n" + Workflow.citation(workflow)
output += "\n\n" + Headers.dashed_line(params.monochrome_logs)
return output
}
Expand Down Expand Up @@ -197,7 +197,7 @@ class Schema {
}
}
output += Headers.dashed_line(params.monochrome_logs)
output += "\n\n" + Checks.citation(workflow)
output += "\n\n" + Workflow.citation(workflow)
output += "\n\n" + Headers.dashed_line(params.monochrome_logs)
return output
}
Expand Down
141 changes: 141 additions & 0 deletions lib/Workflow.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* This file holds several functions used to perform standard checks for the nf-core/viralrecon pipeline.
*/

class Workflow {

// Citation string
private static String citation(workflow) {
return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
"* The pipeline\n" +
" https://doi.org/10.5281/zenodo.1400710\n\n" +
"* The nf-core framework\n" +
" https://doi.org/10.1038/s41587-020-0439-x\n\n" +
"* Software dependencies\n" +
" https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
}

static void validate_params(params, log, valid_params) {
genome_exists(params, log)

// Generic parameter validation
if (!params.fasta) {
log.error "Genome fasta file not specified!"
System.exit(0)
}

if (!params.skip_kraken2 && !params.kraken2_db) {
if (!params.kraken2_db_name) {
log.error "Please specify a valid name to build Kraken2 database for host e.g. 'human'!"
System.exit(0)
}
}

if (!valid_params['protocols'].contains(params.protocol)) {
log.error "Invalid protocol option: ${params.protocol}. Valid options: ${valid_params['protocols'].join(', ')}"
System.exit(0)
}

// Variant calling parameter validation
def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : []
if ((valid_params['callers'] + callers).unique().size() != valid_params['callers'].size()) {
log.error "Invalid variant calller option: ${params.callers}. Valid options: ${valid_params['callers'].join(', ')}"
System.exit(0)
}

if (params.protocol == 'amplicon' && !params.skip_variants && !params.primer_bed) {
log.error "To perform variant calling in 'amplicon' mode please provide a valid primer BED file!"
System.exit(0)
}

// Assembly parameter validation
def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : []
if ((valid_params['assemblers'] + assemblers).unique().size() != valid_params['assemblers'].size()) {
log.error "Invalid assembler option: ${params.assemblers}. Valid options: ${valid_params['assemblers'].join(', ')}"
System.exit(0)
}

if (!valid_params['spades_modes'].contains(params.spades_mode)) {
log.error "Invalid spades mode option: ${params.spades_mode}. Valid options: ${valid_params['spades_modes'].join(', ')}"
System.exit(0)
}
}

// Print a warning after SRA download has completed
static void sra_download(log) {
log.warn "=============================================================================\n" +
" THIS IS AN EXPERIMENTAL FEATURE!\n\n" +
" Please double-check the samplesheet that has been auto-created using the\n" +
" public database ids provided via the '--public_data_ids' parameter.\n\n" +
" All of the sample metadata obtained from the ENA has been appended\n" +
" as additional columns to help you manually curate the samplesheet before\n" +
" you run the main branch of the pipeline.\n" +
"==================================================================================="
}

// Exit pipeline if incorrect --genome key provided
static void genome_exists(params, log) {
if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
log.error "=============================================================================\n" +
" Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
" Currently, the available genome keys are:\n" +
" ${params.genomes.keySet().join(", ")}\n" +
"==================================================================================="
System.exit(0)
}
}

// Get attribute from genome config file e.g. fasta
static String get_genome_attribute(params, attribute) {
def val = ''
if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
if (params.genomes[ params.genome ].containsKey(attribute)) {
val = params.genomes[ params.genome ][ attribute ]
}
}
return val
}

// Print warning if genome fasta has more than one sequence
static void is_multifasta(fasta, log) {
def count = 0
def line = null
fasta.withReader { reader ->
while (line = reader.readLine()) {
if (line.contains('>')) {
count++
if (count > 1) {
log.warn "=============================================================================\n" +
" This pipeline does not officially support multi-fasta genome files!\n\n" +
" The parameters and processes are tailored for viral genome analysis.\n" +
" Please amend the '--fasta' parameter.\n" +
"==================================================================================="
break
}
}
}
}
}

// Function that parses and returns the number of mapped reasds from flagstat files
static ArrayList get_flagstat_mapped_reads(workflow, params, log, flagstat) {
def mapped_reads = 0
flagstat.eachLine { line ->
if (line.contains(' mapped (')) {
mapped_reads = line.tokenize().first().toInteger()
}
}

def pass = false
def logname = flagstat.getBaseName() - 'flagstat'
Map colors = Headers.log_colours(params.monochrome_logs)
if (mapped_reads <= params.min_mapped_reads.toInteger()) {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} [FAIL] Mapped read threshold >= ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS: ${mapped_reads} - $logname${colors.reset}."
} else {
pass = true
//log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} [PASS] Mapped read threshold >=${params.min_mapped_reads}: ${mapped_reads} - $logname${colors.reset}."
}
return [ mapped_reads, pass ]
}

}
6 changes: 3 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ if (params.help) {
/* -- GENOME PARAMETER VALUES -- */
////////////////////////////////////////////////////

params.fasta = Checks.get_genome_attribute(params, 'fasta')
params.gff = Checks.get_genome_attribute(params, 'gff')
params.bowtie2_index = Checks.get_genome_attribute(params, 'bowtie2')
params.fasta = Workflow.get_genome_attribute(params, 'fasta')
params.gff = Workflow.get_genome_attribute(params, 'gff')
params.bowtie2_index = Workflow.get_genome_attribute(params, 'bowtie2')

////////////////////////////////////////////////////
/* -- PRINT PARAMETER SUMMARY -- */
Expand Down
57 changes: 17 additions & 40 deletions workflows/illumina.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,32 @@ params.summary_params = [:]
/* -- VALIDATE INPUTS -- */
////////////////////////////////////////////////////

// Check genome key exists if provided
Checks.genome_exists(params, log)
def valid_params = [
protocols : ['metagenomic', 'amplicon'],
callers : ['ivar', 'bcftools'],
assemblers : ['spades', 'unicycler', 'minia'],
spades_modes: ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio']
]

// Validate input parameters
Workflow.validate_params(params, log, valid_params)

// Check input path parameters to see if they exist
def checkPathParamList = [
params.input, params.fasta, params.gff,
params.bowtie2_index, params.primer_bed, params.primer_fasta,
params.multiqc_config
params.input, params.fasta, params.gff, params.bowtie2_index,
params.kraken2_db, params.primer_bed, params.primer_fasta,
params.blast_db, params.spades_hmm, params.multiqc_config
]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }

// Stage dummy file to be used as an optional input where required
ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true)

// Check mandatory parameters
if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet file not specified!' }
if (!params.fasta) { exit 1, 'Genome fasta file not specified!' }

def protocolList = ['metagenomic', 'amplicon']
if (!protocolList.contains(params.protocol)) {
exit 1, "Invalid protocol option: ${params.protocol}. Valid options: ${protocolList.join(', ')}"
}

// Variant calling parameter validation
def callerList = ['ivar', 'bcftools']
def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : []
if ((callerList + callers).unique().size() != callerList.size()) {
exit 1, "Invalid variant calller option: ${params.callers}. Valid options: ${callerList.join(', ')}"
}

if (params.protocol == 'amplicon' && !params.skip_variants && !params.primer_bed) {
exit 1, "To perform variant calling in 'amplicon' mode please provide a valid primer BED file!"
}
if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet file not specified!' }
if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = ch_dummy_file }

// Assembly parameter validation
def assemblerList = ['spades', 'unicycler', 'minia']
def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : []
def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : []
if ((assemblerList + assemblers).unique().size() != assemblerList.size()) {
exit 1, "Invalid assembler option: ${params.assemblers}. Valid options: ${assemblerList.join(', ')}"
}

def spadesModeList = ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio']
if (!spadesModeList.contains(params.spades_mode)) {
exit 1, "Invalid spades mode option: ${params.spades_mode}. Valid options: ${spadesModeList.join(', ')}"
}
if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = ch_dummy_file }

// if (!params.skip_kraken2 && !params.kraken2_db) {
// if (!params.kraken2_db_name) { exit 1, "Please specify a valid name to build Kraken2 database for host e.g. 'human'!" }

////////////////////////////////////////////////////
/* -- CONFIG FILES -- */
Expand Down Expand Up @@ -189,7 +166,7 @@ workflow ILLUMINA {
)

// Check genome fasta only contains a single contig
Checks.is_multifasta(PREPARE_GENOME.out.fasta, log)
Workflow.is_multifasta(PREPARE_GENOME.out.fasta, log)

/*
* SUBWORKFLOW: Read in samplesheet, validate and stage input files
Expand Down Expand Up @@ -283,7 +260,7 @@ workflow ILLUMINA {
ch_fail_mapping_multiqc = Channel.empty()
if (!params.skip_variants) {
ch_bowtie2_flagstat_multiqc
.map { meta, flagstat -> [ meta ] + Checks.get_flagstat_mapped_reads(workflow, params, log, flagstat) }
.map { meta, flagstat -> [ meta ] + Workflow.get_flagstat_mapped_reads(workflow, params, log, flagstat) }
.set { ch_mapped_reads }

ch_bam
Expand Down
2 changes: 1 addition & 1 deletion workflows/sra_download.nf
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ workflow SRA_DOWNLOAD {
workflow.onComplete {
Completion.email(workflow, params, params.summary_params, projectDir, log)
Completion.summary(workflow, params, log)
Checks.sra_download(log)
Viralrecon.sra_download(log)
}

////////////////////////////////////////////////////
Expand Down

0 comments on commit 6b70a9b

Please sign in to comment.