Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add crabs/dbimport from readsimulator pipeline #6584

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions modules/nf-core/crabs/dbimport/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::crabs=1.0.7
59 changes: 59 additions & 0 deletions modules/nf-core/crabs/dbimport/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process CRABS_DBIMPORT {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/crabs:1.0.7--pyhdfd78af_0':
'biocontainers/crabs:1.0.7--pyhdfd78af_0' }"

input:
tuple val(meta), path(fasta)
tuple val(meta2), path(accession2taxid)
tuple val(meta3), path(names)
tuple val(meta4), path(nodes)

output:
tuple val(meta), path("*.fa"), emit: fasta
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def is_compressed = fasta.name.endsWith(".gz")
def fasta_name = fasta.name.replace(".gz", "")
"""
if [ "${is_compressed}" == "true" ]; then
gzip -c -d ${fasta} > ${fasta_name}
fi

crabs --import \\
--input ${fasta_name} \\
--output ${prefix}.crabsdb.fa \\
--acc2tax ${accession2taxid} \\
--names ${names} \\
--nodes ${nodes} \\
$args

rm ${fasta_name}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
crabs: \$(crabs --help | grep 'CRABS |' | sed 's/.*CRABS | \\(v[0-9.]*\\).*/\\1/')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.fa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
crabs: \$(crabs --help | grep 'CRABS |' | sed 's/.*CRABS | \\(v[0-9.]*\\).*/\\1/')
END_VERSIONS
"""
}
52 changes: 52 additions & 0 deletions modules/nf-core/crabs/dbimport/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: crabs_dbimport
description: In-house generated or curated data can be imported into CRABS.
keywords:
- insilico
- amplicon
- sequencing
- inhouse
tools:
- crabs:
description: |
Crabs (Creating Reference databases for Amplicon-Based Sequencing)
is a program to download and curate reference databases
for eDNA metabarcoding analyses
homepage: https://github.com/gjeunen/reference_database_creator
documentation: https://github.com/gjeunen/reference_database_creator?tab=readme-ov-file#running-crabs
tool_dev_url: https://github.com/gjeunen/reference_database_creator
doi: 10.1111/1755-0998.13741
licence: ["MIT License"]
identifier: ""

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- fasta:
type: file
description: In-house sequencing data
pattern: "*.{fa,fasta}"
output:
- fasta:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "*.fa":
type: file
description: Reverse complemented Sequence
pattern: "*.{fa,fasta}"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@famosab"
- "@a4000"
maintainers:
- "@famosab"
- "@a4000"
90 changes: 90 additions & 0 deletions modules/nf-core/crabs/dbimport/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
nextflow_process {

name "Test Process CRABS_DBIMPORT"
script "../main.nf"
process "CRABS_DBIMPORT"

tag "modules"
tag "modules_nfcore"
tag "crabs"
tag "crabs/dbimport"

test("sarscov2 - fasta") {

config "./nextflow.config"

when {
params {
module_args = "--import-format embl --ranks 'superkingdom;phylum;class;order;family;genus;species' \\"
}
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path +'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[1] = [
[ id:'accession2taxid' ], // meta map
file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
]
input[2] = [
[ id:'names' ], // meta map
file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
]
input[3] = [
[ id:'nodes' ], // meta map
file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("sarscov2 - fasta - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path +'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[1] = [
[ id:'accession2taxid' ], // meta map
file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/accession2taxid/nucl_gb.accession2taxid', checkIfExists: true)
]
input[2] = [
[ id:'names' ], // meta map
file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/names.dmp', checkIfExists: true)
]
input[3] = [
[ id:'nodes' ], // meta map
file(params.modules_testdata_base_path +'genomics/prokaryotes/metagenome/taxonomy/taxdmp/nodes.dmp', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out,
path(process.out.versions[0]).yaml
).match() }
)
}

}

}
63 changes: 63 additions & 0 deletions modules/nf-core/crabs/dbimport/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"sarscov2 - fasta - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,65a68b87678624e13a185da907b3be67"
],
"fasta": [
[
{
"id": "test"
},
"test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,65a68b87678624e13a185da907b3be67"
]
},
{
"CRABS_DBIMPORT": {
"crabs": "v1.0.7"
}
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.4"
},
"timestamp": "2025-02-03T13:52:33.256766"
},
"sarscov2 - fasta": {
"content": [
{
"0": [

],
"1": [

],
"fasta": [

],
"versions": [

]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.4"
},
"timestamp": "2025-02-03T14:15:02.785883"
}
}
7 changes: 7 additions & 0 deletions modules/nf-core/crabs/dbimport/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
process {

withName: CRABS_DBIMPORT {
ext.args = params.module_args
}

}
Loading