Skip to content

Commit

Permalink
add query info to gather CSV output (#1565)
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb authored Jun 2, 2021
1 parent e2dca11 commit bfe7c40
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
6 changes: 4 additions & 2 deletions src/sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,8 @@ def gather(args):
'f_unique_to_query', 'f_unique_weighted',
'average_abund', 'median_abund', 'std_abund', 'name',
'filename', 'md5', 'f_match_orig', 'unique_intersect_bp',
'gather_result_rank', 'remaining_bp']
'gather_result_rank', 'remaining_bp',
'query_filename', 'query_name', 'query_md5', 'query_bp']

with FileOutputCSV(args.output) as fp:
w = csv.DictWriter(fp, fieldnames=fieldnames)
Expand Down Expand Up @@ -894,7 +895,8 @@ def multigather(args):
'average_abund', 'median_abund', 'std_abund', 'name',
'filename', 'md5', 'f_match_orig',
'unique_intersect_bp', 'gather_result_rank',
'remaining_bp']
'remaining_bp', 'query_filename', 'query_name',
'query_md5', 'query_bp']
with FileOutputCSV(output_csv) as fp:
w = csv.DictWriter(fp, fieldnames=fieldnames)
w.writeheader()
Expand Down
13 changes: 11 additions & 2 deletions src/sourmash/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def search_databases_with_abund_query(query, databases, **kwargs):
###

GatherResult = namedtuple('GatherResult',
'intersect_bp, f_orig_query, f_match, f_unique_to_query, f_unique_weighted, average_abund, median_abund, std_abund, filename, name, md5, match, f_match_orig, unique_intersect_bp, gather_result_rank, remaining_bp')
'intersect_bp, f_orig_query, f_match, f_unique_to_query, f_unique_weighted, average_abund, median_abund, std_abund, filename, name, md5, match, f_match_orig, unique_intersect_bp, gather_result_rank, remaining_bp, query_filename, query_name, query_md5, query_bp')


def _find_best(counters, query, threshold_bp):
Expand Down Expand Up @@ -281,6 +281,10 @@ def gather_databases(query, counters, threshold_bp, ignore_abundance):
"""
# track original query information for later usage.
track_abundance = query.minhash.track_abundance and not ignore_abundance
orig_query_bp = len(query.minhash) * query.minhash.scaled
orig_query_filename = query.filename
orig_query_name = query.name
orig_query_md5 = query.md5sum()[:8]
orig_query_mh = query.minhash

# do we pay attention to abundances?
Expand Down Expand Up @@ -381,7 +385,12 @@ def gather_databases(query, counters, threshold_bp, ignore_abundance):
name=str(best_match),
match=best_match,
gather_result_rank=result_n,
remaining_bp=remaining_bp)
remaining_bp=remaining_bp,
query_bp = orig_query_bp,
query_filename=orig_query_filename,
query_name=orig_query_name,
query_md5=orig_query_md5,
)
result_n += 1

yield result, weighted_missed, new_query
Expand Down
10 changes: 8 additions & 2 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -2928,12 +2928,14 @@ def test_gather_csv(linear_gather, prefetch_gather):
testdata2 = utils.get_test_data('short2.fa')
status, out, err = utils.runscript('sourmash',
['compute', testdata1, testdata2,
'--scaled', '10'],
'--scaled', '10',
'--name-from-first'],
in_directory=location)

status, out, err = utils.runscript('sourmash',
['compute', testdata2,
'--scaled', '10',
'--name-from-first',
'-o', 'query.fa.sig'],
in_directory=location)

Expand Down Expand Up @@ -2968,9 +2970,13 @@ def test_gather_csv(linear_gather, prefetch_gather):
assert float(row['f_unique_to_query']) == 1.0
assert float(row['f_match']) == 1.0
assert row['filename'] == 'zzz'
assert row['name'].endswith('short2.fa')
assert row['name'] == 'tr1 4'
assert row['md5'] == 'c9d5a795eeaaf58e286fb299133e1938'
assert row['gather_result_rank'] == '0'
assert row['query_filename'].endswith('short2.fa')
assert row['query_name'] == 'tr1 4'
assert row['query_md5'] == 'c9d5a795'
assert row['query_bp'] == '910'


def test_gather_abund_x_abund(runtmp, prefetch_gather, linear_gather):
Expand Down

0 comments on commit bfe7c40

Please sign in to comment.