Skip to content

Commit

Permalink
Merge pull request #68 from itrujnara/dev
Browse files Browse the repository at this point in the history
Add MERGE_IDS subworkflow
  • Loading branch information
itrujnara authored Nov 15, 2024
2 parents 05bf32b + a522779 commit 2732fb0
Show file tree
Hide file tree
Showing 44 changed files with 985 additions and 34 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<h1>
<picture>
<source media="(prefers-color-scheme: dark)" srcset="docs/images/nf-core-reportho_logo_dark.png">
<img alt="nf-core/reportho" src="docs/images/nf-core-reportho_logo_light.png">
<source media="(prefers-color-scheme: dark)" srcset="docs/images/nf-core-reportho_logo_hex_dark.png">
<img alt="nf-core/reportho" src="docs/images/nf-core-reportho_logo_hex_light.png">
</picture>
</h1>

Expand Down
Binary file added assets/nf-core-reportho_logo_dark.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
117 changes: 116 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,43 @@ process {
maxRetries = 3
}

withName: 'CONCAT_FASTA' {
withName: 'SPLIT_ID_FORMAT' {
publishDir = [
path: { "${params.outdir}/sequences" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'FETCH_UNIPROT_SEQUENCES' {
publishDir = [
path: { "${params.outdir}/sequences" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'FETCH_ENSEMBL_IDMAP' {
publishDir = [
path: { "${params.outdir}/sequences" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'FETCH_ENSEMBL_SEQUENCES' {
publishDir = [
path: { "${params.outdir}/sequences" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'FETCH_REFSEQ_SEQUENCES' {
publishDir = [
path: { "${params.outdir}/sequences" },
mode: params.publish_dir_mode,
Expand All @@ -86,6 +122,23 @@ process {
]
}

withName: 'FETCH_OMA_SEQUENCES' {
publishDir = [
path: { "${params.outdir}/sequences" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'CONCAT_FASTA' {
publishDir = [
path: { "${params.outdir}/sequences" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CONCAT_HITS' {
ext.prefix = {"${meta.id}_hits.txt"}
publishDir = [
Expand All @@ -106,6 +159,68 @@ process {
]
}

withName: 'MERGE_FASTA_IDS' {
ext.args2 = "\'/^>/ { split(\$0, arr, \"|\"); print substr(arr[1], 2) }\'"
ext.prefix = {"${meta.id}_ids"}
ext.suffix = "txt"
publishDir = [
path: { "${params.outdir}/merge" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'DIAMOND_CLUSTER' {
ext.args = "--approx-id 90"
ext.prefix = { "${meta.id}_${db.toString().tokenize(".")[0].tokenize("_")[-1]}" }
publishDir = [
path: { "${params.outdir}/merge" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'POSTPROCESS_DIAMOND' {
ext.args = "-F\'\\t\'"
ext.args2 = """\'{
split(\$1, col1, "|");
split(\$2, col2, "|");
if (col1[1] != col2[1]) {
print col1[1] "\\t" col2[1];
} else {
print col1[1];
}
}\'"""
ext.prefix = { "${meta.id}_ids_diamond" }
ext.suffix = "txt"
publishDir = [
path: { "${params.outdir}/merge" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'GROUP_DIAMOND' {
ext.args2 = "\'{if (NF == 1) a[\$1]=\$1; else for (i=2; i<=NF; i++) a[\$1]=a[\$1] \"\\t\" \$i} END {for (key in a) print key a[key]}\'"
ext.prefix = { "${meta.id}_clusters" }
ext.suffix = "txt"
publishDir = [
path: { "${params.outdir}/merge" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

withName: 'REDUCE_IDMAP' {
ext.args = "-F\'\t\'"
ext.args2 = "\'NF >= 2\'"
ext.prefix = { "${meta.id}_idmap" }
}

withName: 'MERGE_CSV' {
ext.args = '-f 1 --outer-join --na 0'
publishDir = [
Expand Down
Binary file added docs/images/nf-core-reportho_logo_hex_dark.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/nf-core-reportho_logo_hex_light.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,21 @@
"git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20",
"installed_by": ["modules"]
},
"diamond/cluster": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"fastme": {
"branch": "master",
"git_sha": "5f4e755fdc22c6e40d740ab27ea9b1004e806cb5",
"installed_by": ["modules"]
},
"gawk": {
"branch": "master",
"git_sha": "97321eded31a12598837a476d3615300af413bb7",
"installed_by": ["modules"]
},
"iqtree": {
"branch": "master",
"git_sha": "ba03053ffa300ccdd044545131ba033b73f327fe",
Expand Down
2 changes: 1 addition & 1 deletion modules/local/convert_phylip.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process CONVERT_PHYLIP {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
clustal2phylip.py $input_file ${prefix}.phy
Expand Down
3 changes: 1 addition & 2 deletions modules/local/create_tcoffeetemplate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ process CREATE_TCOFFEETEMPLATE {
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def prefix = task.ext.prefix ?: "${meta.id}"
"""
# Prep templates
for structure in \$(ls *.pdb); do
Expand Down
2 changes: 1 addition & 1 deletion modules/local/fetch_afdb_structures.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ process FETCH_AFDB_STRUCTURES {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
fetch_afdb_structures.py $ids $prefix 2> ${prefix}_af_versions.txt
Expand Down
2 changes: 1 addition & 1 deletion modules/local/fetch_eggnog_group_local.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ process FETCH_EGGNOG_GROUP_LOCAL {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
# get the EggNOG ID from the ID map
zcat $eggnog_idmap | grep \$(cat $uniprot_id) | cut -f2 | cut -d',' -f1 > eggnog_id.txt || test -f eggnog_id.txt
Expand Down
2 changes: 1 addition & 1 deletion modules/local/fetch_inspector_group_online.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ process FETCH_INSPECTOR_GROUP_ONLINE {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
# get the Uniprot ID
uniprot_id=\$(cat $uniprot_id)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/fetch_oma_group_local.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ process FETCH_OMA_GROUP_LOCAL {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
# Obtain the OMA ID for the given Uniprot ID of the query protein
uniprot2oma_local.py $uniprot_idmap $uniprot_id > oma_id.txt || test -f oma_id.txt
Expand Down
2 changes: 1 addition & 1 deletion modules/local/fetch_oma_group_online.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process FETCH_OMA_GROUP_ONLINE {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
# get uniprot ID
uniprot_id=\$(cat ${uniprot_id})
Expand Down
2 changes: 1 addition & 1 deletion modules/local/fetch_panther_group_local.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ process FETCH_PANTHER_GROUP_LOCAL {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
id=\$(cat ${uniprot_id})
touch ${prefix}_panther_group_raw.txt
Expand Down
2 changes: 1 addition & 1 deletion modules/local/fetch_panther_group_online.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process FETCH_PANTHER_GROUP_ONLINE {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
# get Uniprot ID and TaxID
uniprot_id=\$(cat $uniprot_id)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/filter_fasta.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process FILTER_FASTA {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
filter_fasta.py ${fasta} ${structures} ${prefix}_filtered.fa
Expand Down
2 changes: 1 addition & 1 deletion modules/local/filter_hits.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ process FILTER_HITS {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
targetfile = use_centroid ? "${prefix}_centroid.txt" : "${prefix}_minscore_${min_score}.txt"
"""
score_hits.py $score_table $prefix $queryid
Expand Down
2 changes: 1 addition & 1 deletion modules/local/identify_seq_online.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process IDENTIFY_SEQ_ONLINE {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
fetch_oma_by_sequence.py $fasta id_raw.txt ${prefix}_taxid.txt ${prefix}_exact.txt
uniprotize_oma_online.py id_raw.txt > ${prefix}_id.txt
Expand Down
2 changes: 1 addition & 1 deletion modules/local/make_hits_table.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process MAKE_HITS_TABLE {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
make_hits_table.py $merged_csv ${meta.id} > ${prefix}_hits_table.csv
Expand Down
12 changes: 6 additions & 6 deletions modules/local/make_report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@ process MAKE_REPORT {
tag "$meta.id"
label 'process_single'

// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error("Local MAKE_REPORT module does not support Conda. Please use Docker / Singularity / Podman instead.")
}

container "nf-core/reportho-orthologs-report:1.0.0"

input:
Expand All @@ -20,7 +15,12 @@ process MAKE_REPORT {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error("Local MAKE_REPORT module does not support Conda. Please use Docker / Singularity / Podman instead.")
}

def prefix = task.ext.prefix ?: meta.id
seqhits_cmd = seq_hits ? "cp $seq_hits public/seq_hits.txt" : ''
seqmisses_cmd = seq_misses ? "cp $seq_misses public/seq_misses.txt" : ''
strhits_cmd = str_hits ? "cp $str_hits public/str_hits.txt" : ''
Expand Down
2 changes: 1 addition & 1 deletion modules/local/make_score_table.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process MAKE_SCORE_TABLE {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
make_score_table.py $merged_csv > ${prefix}_score_table.csv
Expand Down
2 changes: 1 addition & 1 deletion modules/local/make_stats.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process MAKE_STATS {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
make_stats.py ${score_table} > ${prefix}_stats.yml
Expand Down
2 changes: 1 addition & 1 deletion modules/local/plot_orthologs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ process PLOT_ORTHOLOGS {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
plot_orthologs.R $score_table $prefix
Expand Down
2 changes: 1 addition & 1 deletion modules/local/split_id_format.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process SPLIT_ID_FORMAT {
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
def prefix = task.ext.prefix ?: meta.id
"""
cut -d ',' -f 1 $ids | tail -n +2 > tmp
split_id_format.py tmp $prefix
Expand Down
48 changes: 48 additions & 0 deletions modules/local/split_taxids.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process SPLIT_TAXIDS {
tag "$input_file"
label 'process_single'

conda "conda-forge::python=3.12.0 conda-forge::biopython=1.84.0 conda-forge::requests=2.32.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' :
'community.wave.seqera.io/library/python_requests_biopython:3c0f15f68130f062' }"

input:
tuple val(meta), path(input_file)

output:
tuple val(meta), path("*.fa"), emit: fastas
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: meta.id
"""
awk -v RS=">" 'NR > 1 {
split(\$1, header, "|")
id = header[2]
out_filename = "${prefix}_" id ".fa"
print ">" \$0 >> out_filename
close(out_filename)
}' $input_file
cat <<- END_VERSIONS > versions.yml
"${task.process}":
awk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_0.fa
cat <<- END_VERSIONS > versions.yml
"${task.process}":
awk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
END_VERSIONS
"""
}
Loading

0 comments on commit 2732fb0

Please sign in to comment.