From db08838b9e40c6b30fae6fce0b464669f36b0af0 Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Sat, 28 May 2022 13:23:33 -0700 Subject: [PATCH] Uniquify csv output from multigather Hello! Hope you and yours are doing well. I am using multigather to query protein sequences against each other, and primarily use the csv file for downstream processing. As is, if the signatures in `--query query.sig` were created from one fasta file with e.g. `--singleton`, then all results iteratively overwrite each other into the same csv file. This proposed change adds the md5sum to the query file to ensure uniqueness. Other suggestions are welcome! (not tested yet) Warmest, Olga --- src/sourmash/commands.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sourmash/commands.py b/src/sourmash/commands.py index c80b242cc1..4e589d1287 100644 --- a/src/sourmash/commands.py +++ b/src/sourmash/commands.py @@ -998,9 +998,10 @@ def multigather(args): query_filename = query.filename if not query_filename: # use md5sum if query.filename not properly set - query_filename = query.md5sum() - - output_base = os.path.basename(query_filename) + output_base = query.md5sum() + else: + # Uniquify the output file if all signatures were made from the same file (e.g. with --singleton) + output_base = os.path.basename(query_filename) + "." + query.md5sum() output_csv = output_base + '.csv' w = None