diff --git a/README.md b/README.md index 48be39f..b781ed2 100755 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Annotate genes in your bacterial genomes with [Prokka](https://github.com/tseema 7. [ Limitations ](#limitations) 8. [ Publication ](#publication) 9. [ References ](#references) +10. [ FAQ ](#faq) @@ -255,3 +256,13 @@ In particular, RIBAP takes advantage of and uses the following tools: > Conway, Jake R., Alexander Lex, and Nils Gehlenborg. "UpSetR: an R package for the visualization of intersecting sets and their properties." Bioinformatics (2017). [Code](https://github.com/hms-dbmi/UpSetR) | [Publication](https://doi.org/10.1093/bioinformatics/btx364) + + + + +# FAQ + +1) Since Nextflow 23.07.0-edge, Nextflow no longer mounts the host’s home directory when using Apptainer or Singularity. This causes issues in some dependencies. As a workaround, you can revert to the old behavior by setting the environment variable `NXF_APPTAINER_HOME_MOUNT` or `NXF_SINGULARITY_HOME_MOUNT` to `true` in the machine from which you launch the pipeline. From: [https://www.nextflow.io/docs/edge/container.html](https://www.nextflow.io/docs/edge/container.html). + +2) RIBAP might fail due to `RecursionError: maximum recursion depth exceeded in comparison`. This can happen in the current combining step where Roary initial clusters are combined with the ILP results. This is done in a Python script where the default recursion depth is 1000. If this happens, you can increase the recursion depth via the parameter `--set_recursion_limit`. However, be careful not increasing this too much (we tested until 5000). See als [https://github.com/hoelzer-lab/ribap/issues/66](https://github.com/hoelzer-lab/ribap/issues/66) + diff --git a/bin/create_msa_tree.py b/bin/create_msa_tree.py index 96041cf..210005b 100755 --- a/bin/create_msa_tree.py +++ b/bin/create_msa_tree.py @@ -18,6 +18,7 @@ dirPath = sys.argv[1] ribapTable = sys.argv[2] +wasRenamed = sys.argv[3] msaPath = f"{dirPath}/msa/" @@ -29,7 +30,10 @@ NUMSTRAINS += 1 basename = os.path.basename(file) for record in SeqIO.parse(file, 'fasta'): - basename = basename.replace("_RENAMED.faa", '') + if len(wasRenamed) > 1: + basename = basename.replace(".faa", '') + else: + basename = basename.replace("_RENAMED.faa", '') geneID = record.id.split('_')[1] geneID = f"{basename}_{geneID}" id2strain[record.id] = geneID diff --git a/bin/generate_html.py b/bin/generate_html.py index 89e68e7..687c4a9 100755 --- a/bin/generate_html.py +++ b/bin/generate_html.py @@ -246,7 +246,7 @@ continue if i == 0: # HTMLBODY += f'{strain}{individualAnno.split(" // ")[0]}{individualAnno.split(" // ")[1]}\n \n{tree}

Multiple Sequence Alignment
Newick Tree

\n' - HTMLBODY += f'{strain}{individualAnno.split(" // ")[0]}{individualAnno.split(" // ")[1]}\n \n{tree}

Multiple Sequence Alignment
Newick Tree

\n' + HTMLBODY += f'{strain}{individualAnno.split(" // ")[0]}{individualAnno.split(" // ")[1]}\n \n{tree}

Multiple Sequence Alignment
Newick Tree

\n' i += 1 else: HTMLBODY += f'{strain}{individualAnno.split(" // ")[0]}{individualAnno.split(" // ")[1]} \n' diff --git a/modules/combine_roary_ilp.nf b/modules/combine_roary_ilp.nf index b2912f9..bd836d4 100644 --- a/modules/combine_roary_ilp.nf +++ b/modules/combine_roary_ilp.nf @@ -6,13 +6,14 @@ process combine_roary_ilp { publishDir "${params.output}/05-combine", mode: 'copy', pattern: "*.txt" input: - tuple val(ident), file(roary), file(strain_ids), file(prokka_gff) - file(solved_ilps) + tuple val(ident), path(roary), path(strain_ids), path(prokka_gff) + path(solved_ilps) + path(script) output: - tuple val(ident), file("holy*.csv") - tuple val(ident), file("ribap*.csv") - tuple val(ident), file("*.txt") + tuple val(ident), path("holy*.csv") + tuple val(ident), path("ribap*.csv") + tuple val(ident), path("*.txt") script: """ @@ -22,7 +23,8 @@ process combine_roary_ilp { mkdir prokka cp *.gff prokka/ - combine_roary_ilp.py ${strain_ids} ${ident}/gene_presence_absence.csv solved/ holy_python_ribap_"${ident}".csv ${ident} > ribap_roary"${ident}"_summary.txt + # setrecursionlimit see: https://github.com/hoelzer-lab/ribap/issues/66 + python -c "import sys;sys.setrecursionlimit(${params.set_recursion_limit});exec(open('combine_roary_ilp.py').read())" ${strain_ids} ${ident}/gene_presence_absence.csv solved/ holy_python_ribap_"${ident}".csv ${ident} > ribap_roary"${ident}"_summary.txt """ } diff --git a/modules/generate_html.nf b/modules/generate_html.nf index 192ec0b..0c4a262 100644 --- a/modules/generate_html.nf +++ b/modules/generate_html.nf @@ -9,14 +9,13 @@ process generate_html { file(roary) file(individual_annotations) file(tree_svg) + file(web_dir) output: file("web") script: """ - cp "$baseDir/data/web.tar.gz" . - #wget https://www.rna.uni-jena.de/supplements/ribap/web.tar.gz tar zxvf web.tar.gz mkdir tree diff --git a/modules/generate_upsetr_input.nf b/modules/generate_upsetr_input.nf index 2980503..2f32394 100644 --- a/modules/generate_upsetr_input.nf +++ b/modules/generate_upsetr_input.nf @@ -38,7 +38,10 @@ with open(input_file) as holytable: strain = line.split('\\t') my_list = strain[3:] for i in my_list: - my_dict[i + '_RENAMED'] = [] + if len('${params.annotation_file}') > 1: + my_dict[i] = [] + else: + my_dict[i + '_RENAMED'] = [] else: ids = line.split('\\t') for x in line.split('\\t')[3:]: diff --git a/modules/prepare_msa.nf b/modules/prepare_msa.nf index 769a1c7..c7f7843 100644 --- a/modules/prepare_msa.nf +++ b/modules/prepare_msa.nf @@ -18,7 +18,7 @@ process prepare_msa { cp *.faa faa/ mkdir msa - create_msa_tree.py . ${holy_ribap_csv} + create_msa_tree.py . ${holy_ribap_csv} '${params.annotation_file}' #mv msa/*.faa . """ } diff --git a/nextflow.config b/nextflow.config index 77ece88..1838441 100755 --- a/nextflow.config +++ b/nextflow.config @@ -28,6 +28,7 @@ params { chunks = 8 // how many ILP chunks for parallel computing protein_fasta_file = '' annotation_file = '' + set_recursion_limit = 1000 // folder structure output = 'results' @@ -119,6 +120,7 @@ profiles { singularity { enabled = true autoMounts = true + envWhitelist = "HTTPS_PROXY,HTTP_PROXY,http_proxy,https_proxy,FTP_PROXY,ftp_proxy" cacheDir = params.singularityCacheDir } includeConfig 'configs/container.config' diff --git a/ribap.nf b/ribap.nf index 4b815e9..c198cca 100755 --- a/ribap.nf +++ b/ribap.nf @@ -211,9 +211,9 @@ workflow RIBAP { .join(identity_ch .combine(gff_ch).groupTuple()) - combine_roary_ilp(combine_ch, ilp_refinement.out[0].flatten().toList()) - - + // we copy the script in the process to execute python in a special way. We copy it in due to https://github.com/hoelzer-lab/ribap/issues/66 + combine_roary_ilp_script = Channel.fromPath( workflow.projectDir + '/bin/combine_roary_ilp.py', checkIfExists: true ) + combine_roary_ilp(combine_ch, ilp_refinement.out[0].flatten().toList(), combine_roary_ilp_script) // // select only the 95 combined output file // identity_ch = Channel.from(95) @@ -230,7 +230,10 @@ workflow RIBAP { //combine_msa(mafft.out.collect(), strain_ids.out) build_html_ch = identity_ch.join(combine_roary_ilp.out[0]) - generate_html(build_html_ch, roary.out.collect(), combine_roary_ilp.out[1].collect(), nw_display.out.collect()) + // get the web.tar.gz path bc since nf v23 mounting of the home dir in containers (singularity) is not possible per default + // see: https://github.com/hoelzer-lab/ribap/issues/67 + web_dir = Channel.fromPath( workflow.projectDir + '/data/web.tar.gz', checkIfExists: true ) + generate_html(build_html_ch, roary.out.collect(), combine_roary_ilp.out[1].collect(), nw_display.out.collect(), web_dir) generate_upsetr_input(identity_ch.join(combine_roary_ilp.out[0]), strain_ids.out) upsetr(generate_upsetr_input.out[1]) @@ -300,6 +303,9 @@ def helpMSG() { Note that this flag requires the usage of the --annotation_file flag. This will skip the Prokka annotation of the workflow and uses your own annotation instead. If --list is set this expects a CSV file of type 'samplename, path_to_protein_fasta_file'. [default: $params.protein_fasta_file] + --set_recursion_limit In case of a "RecursionError: maximum recursion depth exceeded in comparison" error, you can try to increase the + recursion limit of Python when combining the ILP and roary results. ATTENTION: only do this when you can closely + monitor the resources on your system and you know what you are doing! [default: $params.set_recursion_limit] ${c_yellow}UpSet plot:${c_reset} --sets FASTA simpleNames for genomes that should be