Skip to content

Commit

Permalink
prokka2tsv
Browse files Browse the repository at this point in the history
  • Loading branch information
Danilo Di Leo committed Apr 8, 2024
1 parent ae70e71 commit 821f2bd
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ process {
]
}

withName: PROKKAGFF2TSV {
publishDir = [
path: { "${params.outdir}/summary_tables" },
mode: params.publish_dir_mode,
pattern: "*.prokka-annotations.tsv.gz"
]
}

withName: 'GENOMEINDEX' {
publishDir = [
enabled: false
Expand Down
62 changes: 62 additions & 0 deletions modules/local/prokkagff2tsv.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process PROKKAGFF2TSV {
tag "$meta.id"
label 'process_medium'

conda "conda-forge::r-tidyverse=2.0.0 conda-forge::r-dtplyr=1.3.1 conda-forge::r-data.table=1.14.8"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-b2ec1fea5791d428eebb8c8ea7409c350d31dada:a447f6b7a6afde38352b24c30ae9cd6e39df95c4-1' :
'biocontainers/mulled-v2-b2ec1fea5791d428eebb8c8ea7409c350d31dada:a447f6b7a6afde38352b24c30ae9cd6e39df95c4-1' }"

input:
tuple val(meta), path(gff)

output:
tuple val(meta), path("*.prokka-annotations.tsv.gz"), emit: tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
#!/usr/bin/env Rscript
library(data.table)
library(dtplyr)
library(dplyr)
library(tidyr)
library(readr)
library(stringr)
fread(
cmd = "zgrep -P '\\t' $gff",
col.names = c('contig', 'gene_caller', 'feature', 'start', 'end', 'a', 'strand', 'b', 'c')
) %>%
separate_rows(c, sep = ';') %>%
separate(c, c('k', 'v'), sep = '=') %>%
pivot_wider(names_from = k, values_from = v) %>%
select(-a, -b) %>%
rename(orf = ID) %>%
rename_all(str_to_lower) %>%
relocate(sort(colnames(.)[8:ncol(.)]), .after = 7) %>%
relocate(orf) %>%
as.data.table() %>%
write_tsv("${prefix}.prokka-annotations.tsv.gz")
writeLines(
c(
"\\"${task.process}\\":",
paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),
paste0(" data.table: ", packageVersion("data.table")),
paste0(" dtplyr: " , packageVersion("dtplyr")),
paste0(" dplyr: " , packageVersion("dplyr")),
paste0(" tidyr: " , packageVersion("tidyr")),
paste0(" readr: " , packageVersion("readr"))
),
"versions.yml"
)
"""
}
5 changes: 5 additions & 0 deletions subworkflows/local/concatenate_gff.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ include { CAT_CAT as FIRST_CAT } from '../../modules/nf-core/cat/cat'
include { CAT_CAT as SECOND_CAT } from '../../modules/nf-core/cat/cat'
include { GENOMEINDEX } from '../../modules/local/genomeindex'
include { CAT_CAT as GINDEX_CAT } from '../../modules/nf-core/cat/cat'
include { PROKKAGFF2TSV } from '../../modules/local/prokkagff2tsv'

workflow CAT_GFFS {
take: ch_genome_gffs
Expand All @@ -31,8 +32,12 @@ workflow CAT_GFFS {
GINDEX_CAT(GENOMEINDEX.out.genomes2id.collect().map { [ [id: 'genomes_index'], it ] })
ch_versions = ch_versions.mix(GINDEX_CAT.out.versions)

PROKKAGFF2TSV(SECOND_CAT.out.file_out)
ch_versions = ch_versions.mix(PROKKAGFF2TSV.out.versions)

emit:
gff = SECOND_CAT.out.file_out
gindex = GINDEX_CAT.out.file_out
gfftsv = PROKKAGFF2TSV.out.tsv
versions = ch_versions
}

0 comments on commit 821f2bd

Please sign in to comment.