Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] better handle some pickfile errors #1924

Merged
merged 3 commits into from
Apr 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/sourmash/picklist.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"Picklist code for extracting subsets of signatures."
import csv
import os
from enum import Enum

# set up preprocessing functions for column stuff
Expand Down Expand Up @@ -143,18 +144,23 @@ def load(self, pickfile, column_name):
"load pickset, return num empty vals, and set of duplicate vals."
pickset = self.init()

if not os.path.exists(pickfile) or not os.path.isfile(pickfile):
raise ValueError(f"pickfile '{pickfile}' must exist and be a regular file")

n_empty_val = 0
dup_vals = set()
with open(pickfile, newline='') as csvfile:
x = csvfile.readline()

# skip leading comment line in case there's a manifest header
if x[0] == '#':
if not x or x[0] == '#':
pass
else:
csvfile.seek(0)

r = csv.DictReader(csvfile)
if not r.fieldnames:
raise ValueError(f"empty or improperly formatted pickfile '{pickfile}'")

if column_name not in r.fieldnames:
raise ValueError(f"column '{column_name}' not in pickfile '{pickfile}'")
Expand Down
8 changes: 4 additions & 4 deletions src/sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,15 @@ def load_picklist(args):
if args.picklist:
try:
picklist = SignaturePicklist.from_picklist_args(args.picklist)

notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")

n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)
except ValueError as exc:
error("ERROR: could not load picklist.")
error(str(exc))
sys.exit(-1)

notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")

n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)

notify(f"loaded {len(picklist.pickset)} distinct values into picklist.")
if n_empty_val:
notify(f"WARNING: {n_empty_val} empty values in column '{picklist.column_name}' in picklist file")
Expand Down
39 changes: 39 additions & 0 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -1627,6 +1627,45 @@ def test_sig_extract_7_no_ksize(c):
assert len(siglist) == 3


def test_sig_extract_8_empty_picklist_fail(runtmp):
# what happens with an empty picklist?
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# make empty picklist
picklist_csv = runtmp.output('pick.csv')
with open(picklist_csv, 'w', newline='') as csvfp:
pass

picklist_arg = f"{picklist_csv}:md5full:md5"

with pytest.raises(SourmashCommandFailed):
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

err = runtmp.last_result.err
print(err)

assert "empty or improperly formatted pickfile" in err


def test_sig_extract_8_nofile_picklist_fail(runtmp):
# what happens when picklist file does not exist?
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# picklist file does not exist
picklist_csv = runtmp.output('pick.csv')
picklist_arg = f"{picklist_csv}:md5full:md5"

with pytest.raises(SourmashCommandFailed):
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

err = runtmp.last_result.err
print(err)

assert "must exist and be a regular file" in err


def test_sig_extract_8_picklist_md5(runtmp):
# extract 47 from 47, using a picklist w/full md5
sig47 = utils.get_test_data('47.fa.sig')
Expand Down