Skip to content

Commit

Permalink
new logic
Browse files Browse the repository at this point in the history
  • Loading branch information
Danilo Di Leo committed Feb 21, 2025
1 parent b4fbb7b commit 9bad327
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 4 deletions.
6 changes: 5 additions & 1 deletion modules/local/check_duplicates.nf
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
process CHECK_DUPLICATES {
label 'process_low'
tag "${meta.id}"

conda "conda-forge::pigz=2.6"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:24.04' :
'biocontainers/ubuntu:24.04' }"

input:
path fnas
tuple val(meta), path(fna)

output:
path "duplicates.txt" , emit: duplicates_file, optional: true
tuple val(meta), path(fna) , emit: fna, optional: true
path "versions.yml" , emit: versions

script:
prefix = task.ext.prefix ?: meta.id

"""
# Find duplicate contig names across all files
zgrep -H '>' *.fna.gz | sed 's/^[^:]*://' | sort | uniq -d > duplicate_contig_names.txt
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ params {
checkm_metadata = null
gtdbtk_metadata = null
se_reads = false
check_duplicates = true
rename_contigs = false

// Trimming
Expand Down
5 changes: 5 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@
"description": "Activate when you work with single ends reads.",
"fa_icon": "fas fa-arrows-alt-h"
},
"check_duplicates": {
"type": "boolean",
"default": true,
"description": "Always active. If it finds duplicates in your contigs files, it will return an error with suggestions."
},
"rename_contigs": {
"type": "boolean",
"description": "Activate when you want rename to contig names. Useful when you run magmap after nf-core/mag or in any other case when there's a risk you have the same name of contigs in different genomes."
Expand Down
19 changes: 16 additions & 3 deletions workflows/magmap.nf
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,23 @@ workflow MAGMAP {
//
// Check presence of duplicates contigs in the local genome collection
//
CHECK_DUPLICATES(ch_genomeinfo.map{ it.genome_fna }.collect())
ch_versions = ch_versions.mix(CHECK_DUPLICATES.out.versions)
if (params.check_duplicates) {
CHECK_DUPLICATES(ch_genomeinfo.map{ it.genome_fna }.collect().map { [ [id: 'test'], it ] } )
ch_versions = ch_versions.mix(CHECK_DUPLICATES.out.versions)

// Check for duplicates and emit a value to rename_trigger if needed
CHECK_DUPLICATES.out.duplicates_file
.countLines()
.map { it.toInteger() }
.subscribe { count ->
if (count > 0) {
error """Your genomes have duplicate contig names. Either set --rename_duplicates and resume the pipeline to let the pipeline fix this, or fix the genome files manually and resume the pipeline. NB! deactivate params.check_duplicates and resume the pipeline."""
}
}
}

if( params.rename_contigs ) {
// RENAME_CONTIGS(rename_input)
if( params.rename_contigs ) {
RENAME_CONTIGS( ch_genomeinfo.map{ [ it.accno, it.genome_fna ] } )
ch_versions = ch_versions.mix(RENAME_CONTIGS.out.versions)

Expand Down

0 comments on commit 9bad327

Please sign in to comment.