Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev 0.7.5 #281

Merged
merged 4 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions workflow/rules/antismash.smk
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,35 @@ elif antismash_major_version >= 7:
genefinding="none",
shell:
"""
# Find the latest existing json output for this strain
latest_version=$(ls -d data/interim/antismash/*/*/*.json | grep {wildcards.strains} | sort -r | head -n 1 | cut -d '/' -f 4)
set +e

# Find the latest existing JSON output for this strain
latest_version=$(ls -d data/interim/antismash/*/*/*.json | grep {wildcards.strains} | sort -r | head -n 1 | cut -d '/' -f 4) 2>> {log}

if [ -n "$latest_version" ]; then
OLD_JSON="data/interim/antismash/$latest_version/{wildcards.strains}/{wildcards.strains}.json"
echo "Using existing json from $OLD_JSON as starting point..." >> {log}
ANTISMASH_INPUT="--reuse-result $OLD_JSON"
# Use existing JSON result as starting point
old_json="data/interim/antismash/$latest_version/{wildcards.strains}/{wildcards.strains}.json"
echo "Using existing JSON from $old_json as starting point..." >> {log}
antismash_input="--reuse-result $old_json"
else
echo "No existing output directories found, starting AntiSMASH from scratch..." >> {log}
ANTISMASH_INPUT="{input.gbk}"
# No existing JSON result found, use genbank input
echo "No existing JSON result found, starting AntiSMASH from scratch..." >> {log}
antismash_input="{input.gbk}"
fi
# run antismash

# Run AntiSMASH
antismash --genefinding-tool {params.genefinding} --output-dir {params.folder} \
--database {input.resources} \
--cb-general --cb-subclusters --cb-knownclusters -c {threads} $ANTISMASH_INPUT --logfile {log} 2>> {log}
--cb-general --cb-subclusters --cb-knownclusters -c {threads} $antismash_input --logfile {log} 2>> {log}

# Check if the run failed due to changed detection results
if grep -q "ValueError: Detection results have changed. No results can be reused" {log}; then
# Use genbank input instead
echo "Previous JSON result is invalid, starting AntiSMASH from scratch..." >> {log}
antismash --genefinding-tool {params.genefinding} --output-dir {params.folder} \
--database {input.resources} \
--cb-general --cb-subclusters --cb-knownclusters -c {threads} {input.gbk} --logfile {log} 2>> {log}
fi
"""

rule copy_antismash:
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ from pathlib import Path
import peppy

min_version("7.14.0")
__version__ = "0.7.4"
__version__ = "0.7.5"


container: "docker://matinnu/bgcflow:latest"
Expand Down
14 changes: 7 additions & 7 deletions workflow/rules/deeptfactor.smk
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,30 @@ rule deeptfactor_setup:

rule deeptfactor:
input:
fasta="data/interim/prokka/{strains}/{strains}.faa",
faa="data/interim/prokka/{strains}/{strains}.faa",
resource="resources/deeptfactor/",
output:
deeptfactor_dir=directory("data/interim/deeptfactor/{strains}/"),
deeptfactor="data/interim/deeptfactor/{strains}/prediction_result.txt",
conda:
"../envs/deeptfactor.yaml"
threads: 2
params:
faa="../../data/interim/prokka/{strains}/{strains}.faa",
outdir="../../data/interim/deeptfactor/{strains}/",
outdir="data/interim/deeptfactor/{strains}/",
log:
"logs/deeptfactor/deeptfactor/deeptfactor-{strains}.log",
shell:
"""
workdir=$PWD
mkdir -p data/interim/deeptfactor/{wildcards.strains} 2>> {log}
(cd {input.resource} && python tf_running.py \
-i {params.faa} -o {params.outdir} \
-i $workdir/{input.faa} -o $workdir/{params.outdir} \
-g cpu -cpu {threads}) 2>> {log}
"""


rule deeptfactor_to_json:
input:
deeptfactor_dir="data/interim/deeptfactor/{strains}/",
deeptfactor="data/interim/deeptfactor/{strains}/prediction_result.txt",
output:
deeptfactor_json="data/interim/deeptfactor/{strains}_deeptfactor.json",
conda:
Expand All @@ -46,7 +46,7 @@ rule deeptfactor_to_json:
"logs/deeptfactor/deeptfactor/deeptfactor-{strains}_to_json.log",
shell:
"""
python workflow/bgcflow/bgcflow/data/deeptfactor_scatter.py {input.deeptfactor_dir}/prediction_result.txt {output.deeptfactor_json} 2>> {log}
python workflow/bgcflow/bgcflow/data/deeptfactor_scatter.py {input.deeptfactor} {output.deeptfactor_json} 2>> {log}
"""


Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/roary.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ rule roary:
"../envs/roary.yaml"
params:
i=80,
g=60000,
threads: 8
g=80000,
threads: 16
log:
"logs/roary/roary-{name}.log",
shell:
Expand Down