diff --git a/README.md b/README.md
index 48be39f..b781ed2 100755
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@ Annotate genes in your bacterial genomes with [Prokka](https://github.com/tseema
7. [ Limitations ](#limitations)
8. [ Publication ](#publication)
9. [ References ](#references)
+10. [ FAQ ](#faq)
@@ -255,3 +256,13 @@ In particular, RIBAP takes advantage of and uses the following tools:
> Conway, Jake R., Alexander Lex, and Nils Gehlenborg. "UpSetR: an R package for the visualization of intersecting sets and their properties." Bioinformatics (2017).
[Code](https://github.com/hms-dbmi/UpSetR) | [Publication](https://doi.org/10.1093/bioinformatics/btx364)
+
+
+
+
+# FAQ
+
+1) Since Nextflow 23.07.0-edge, Nextflow no longer mounts the host’s home directory when using Apptainer or Singularity. This causes issues in some dependencies. As a workaround, you can revert to the old behavior by setting the environment variable `NXF_APPTAINER_HOME_MOUNT` or `NXF_SINGULARITY_HOME_MOUNT` to `true` in the machine from which you launch the pipeline. From: [https://www.nextflow.io/docs/edge/container.html](https://www.nextflow.io/docs/edge/container.html).
+
+2) RIBAP might fail due to `RecursionError: maximum recursion depth exceeded in comparison`. This can happen in the current combining step where Roary initial clusters are combined with the ILP results. This is done in a Python script where the default recursion depth is 1000. If this happens, you can increase the recursion depth via the parameter `--set_recursion_limit`. However, be careful not increasing this too much (we tested until 5000). See als [https://github.com/hoelzer-lab/ribap/issues/66](https://github.com/hoelzer-lab/ribap/issues/66)
+
diff --git a/bin/create_msa_tree.py b/bin/create_msa_tree.py
index 96041cf..210005b 100755
--- a/bin/create_msa_tree.py
+++ b/bin/create_msa_tree.py
@@ -18,6 +18,7 @@
dirPath = sys.argv[1]
ribapTable = sys.argv[2]
+wasRenamed = sys.argv[3]
msaPath = f"{dirPath}/msa/"
@@ -29,7 +30,10 @@
NUMSTRAINS += 1
basename = os.path.basename(file)
for record in SeqIO.parse(file, 'fasta'):
- basename = basename.replace("_RENAMED.faa", '')
+ if len(wasRenamed) > 1:
+ basename = basename.replace(".faa", '')
+ else:
+ basename = basename.replace("_RENAMED.faa", '')
geneID = record.id.split('_')[1]
geneID = f"{basename}_{geneID}"
id2strain[record.id] = geneID
diff --git a/bin/generate_html.py b/bin/generate_html.py
index 89e68e7..687c4a9 100755
--- a/bin/generate_html.py
+++ b/bin/generate_html.py
@@ -246,7 +246,7 @@
continue
if i == 0:
# HTMLBODY += f'
{strain} | {individualAnno.split(" // ")[0]} | {individualAnno.split(" // ")[1]} | \n | | | | | \n{tree} Multiple Sequence Alignment Newick Tree |
\n'
- HTMLBODY += f'{strain} | {individualAnno.split(" // ")[0]} | {individualAnno.split(" // ")[1]} | \n | | | | | \n{tree} Multiple Sequence Alignment Newick Tree |
\n'
+ HTMLBODY += f'{strain} | {individualAnno.split(" // ")[0]} | {individualAnno.split(" // ")[1]} | \n | | | | | \n{tree} Multiple Sequence Alignment Newick Tree |
\n'
i += 1
else:
HTMLBODY += f'{strain} | {individualAnno.split(" // ")[0]} | {individualAnno.split(" // ")[1]} | | | | | |
\n'
diff --git a/modules/combine_roary_ilp.nf b/modules/combine_roary_ilp.nf
index b2912f9..bd836d4 100644
--- a/modules/combine_roary_ilp.nf
+++ b/modules/combine_roary_ilp.nf
@@ -6,13 +6,14 @@ process combine_roary_ilp {
publishDir "${params.output}/05-combine", mode: 'copy', pattern: "*.txt"
input:
- tuple val(ident), file(roary), file(strain_ids), file(prokka_gff)
- file(solved_ilps)
+ tuple val(ident), path(roary), path(strain_ids), path(prokka_gff)
+ path(solved_ilps)
+ path(script)
output:
- tuple val(ident), file("holy*.csv")
- tuple val(ident), file("ribap*.csv")
- tuple val(ident), file("*.txt")
+ tuple val(ident), path("holy*.csv")
+ tuple val(ident), path("ribap*.csv")
+ tuple val(ident), path("*.txt")
script:
"""
@@ -22,7 +23,8 @@ process combine_roary_ilp {
mkdir prokka
cp *.gff prokka/
- combine_roary_ilp.py ${strain_ids} ${ident}/gene_presence_absence.csv solved/ holy_python_ribap_"${ident}".csv ${ident} > ribap_roary"${ident}"_summary.txt
+ # setrecursionlimit see: https://github.com/hoelzer-lab/ribap/issues/66
+ python -c "import sys;sys.setrecursionlimit(${params.set_recursion_limit});exec(open('combine_roary_ilp.py').read())" ${strain_ids} ${ident}/gene_presence_absence.csv solved/ holy_python_ribap_"${ident}".csv ${ident} > ribap_roary"${ident}"_summary.txt
"""
}
diff --git a/modules/generate_html.nf b/modules/generate_html.nf
index 192ec0b..0c4a262 100644
--- a/modules/generate_html.nf
+++ b/modules/generate_html.nf
@@ -9,14 +9,13 @@ process generate_html {
file(roary)
file(individual_annotations)
file(tree_svg)
+ file(web_dir)
output:
file("web")
script:
"""
- cp "$baseDir/data/web.tar.gz" .
- #wget https://www.rna.uni-jena.de/supplements/ribap/web.tar.gz
tar zxvf web.tar.gz
mkdir tree
diff --git a/modules/generate_upsetr_input.nf b/modules/generate_upsetr_input.nf
index 2980503..2f32394 100644
--- a/modules/generate_upsetr_input.nf
+++ b/modules/generate_upsetr_input.nf
@@ -38,7 +38,10 @@ with open(input_file) as holytable:
strain = line.split('\\t')
my_list = strain[3:]
for i in my_list:
- my_dict[i + '_RENAMED'] = []
+ if len('${params.annotation_file}') > 1:
+ my_dict[i] = []
+ else:
+ my_dict[i + '_RENAMED'] = []
else:
ids = line.split('\\t')
for x in line.split('\\t')[3:]:
diff --git a/modules/prepare_msa.nf b/modules/prepare_msa.nf
index 769a1c7..c7f7843 100644
--- a/modules/prepare_msa.nf
+++ b/modules/prepare_msa.nf
@@ -18,7 +18,7 @@ process prepare_msa {
cp *.faa faa/
mkdir msa
- create_msa_tree.py . ${holy_ribap_csv}
+ create_msa_tree.py . ${holy_ribap_csv} '${params.annotation_file}'
#mv msa/*.faa .
"""
}
diff --git a/nextflow.config b/nextflow.config
index 77ece88..1838441 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -28,6 +28,7 @@ params {
chunks = 8 // how many ILP chunks for parallel computing
protein_fasta_file = ''
annotation_file = ''
+ set_recursion_limit = 1000
// folder structure
output = 'results'
@@ -119,6 +120,7 @@ profiles {
singularity {
enabled = true
autoMounts = true
+ envWhitelist = "HTTPS_PROXY,HTTP_PROXY,http_proxy,https_proxy,FTP_PROXY,ftp_proxy"
cacheDir = params.singularityCacheDir
}
includeConfig 'configs/container.config'
diff --git a/ribap.nf b/ribap.nf
index 4b815e9..c198cca 100755
--- a/ribap.nf
+++ b/ribap.nf
@@ -211,9 +211,9 @@ workflow RIBAP {
.join(identity_ch
.combine(gff_ch).groupTuple())
- combine_roary_ilp(combine_ch, ilp_refinement.out[0].flatten().toList())
-
-
+ // we copy the script in the process to execute python in a special way. We copy it in due to https://github.com/hoelzer-lab/ribap/issues/66
+ combine_roary_ilp_script = Channel.fromPath( workflow.projectDir + '/bin/combine_roary_ilp.py', checkIfExists: true )
+ combine_roary_ilp(combine_ch, ilp_refinement.out[0].flatten().toList(), combine_roary_ilp_script)
// // select only the 95 combined output file
// identity_ch = Channel.from(95)
@@ -230,7 +230,10 @@ workflow RIBAP {
//combine_msa(mafft.out.collect(), strain_ids.out)
build_html_ch = identity_ch.join(combine_roary_ilp.out[0])
- generate_html(build_html_ch, roary.out.collect(), combine_roary_ilp.out[1].collect(), nw_display.out.collect())
+ // get the web.tar.gz path bc since nf v23 mounting of the home dir in containers (singularity) is not possible per default
+ // see: https://github.com/hoelzer-lab/ribap/issues/67
+ web_dir = Channel.fromPath( workflow.projectDir + '/data/web.tar.gz', checkIfExists: true )
+ generate_html(build_html_ch, roary.out.collect(), combine_roary_ilp.out[1].collect(), nw_display.out.collect(), web_dir)
generate_upsetr_input(identity_ch.join(combine_roary_ilp.out[0]), strain_ids.out)
upsetr(generate_upsetr_input.out[1])
@@ -300,6 +303,9 @@ def helpMSG() {
Note that this flag requires the usage of the --annotation_file flag. This will skip the Prokka annotation of the
workflow and uses your own annotation instead. If --list is set this
expects a CSV file of type 'samplename, path_to_protein_fasta_file'. [default: $params.protein_fasta_file]
+ --set_recursion_limit In case of a "RecursionError: maximum recursion depth exceeded in comparison" error, you can try to increase the
+ recursion limit of Python when combining the ILP and roary results. ATTENTION: only do this when you can closely
+ monitor the resources on your system and you know what you are doing! [default: $params.set_recursion_limit]
${c_yellow}UpSet plot:${c_reset}
--sets FASTA simpleNames for genomes that should be