Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] add sig cat --from-file #1657

Merged
merged 4 commits into from
Jul 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/sourmash/cli/sig/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@

def subparser(subparsers):
subparser = subparsers.add_parser('cat')
subparser.add_argument('signatures', nargs='+')
subparser.add_argument('signatures', nargs='*')
subparser.add_argument(
'--from-file',
help='a text file containing a list of files to load signatures from'
)
subparser.add_argument(
'-q', '--quiet', action='store_true',
help='suppress non-error output'
Expand Down
5 changes: 5 additions & 0 deletions src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ def cat(args):
save_sigs = sourmash_args.SaveSignaturesToLocation(args.output)
save_sigs.open()

if args.from_file:
more_files = sourmash_args.load_pathlist_from_file(args.from_file)
args.signatures = list(args.signatures)
args.signatures.extend(more_files)

for sigfile in args.signatures:
try:
loader = sourmash_args.load_file_as_signatures(sigfile,
Expand Down
5 changes: 3 additions & 2 deletions src/sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def short_notify(self, msg_template, *args, **kwargs):
notify(msg, end=end)

def notify(self, location):
self.short_notify(f"...reading from file '{location}'", end='\r')
self.short_notify(f"...{self.n_sig} sigs so far. Now reading from file '{location}'", end='\r')

def start_file(self, location, loader):
n_this = 0
Expand All @@ -581,7 +581,8 @@ def start_file(self, location, loader):
finally:
self.n_sig += n_this

self.short_notify(f"loaded {n_this} sigs from '{location}'")
self.short_notify(f"Loaded {n_this} sigs from '{location}'",
end='\r')


#
Expand Down
62 changes: 38 additions & 24 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,31 +763,13 @@ def test_sig_cat_2_out_inplace(c):


@utils.in_tempdir
def test_sig_cat_filelist(c):
def test_sig_cat_3_filelist(c):
# cat using a file list as input
sig47 = utils.get_test_data('47.fa.sig')
# sig47list = list(load_signatures(sig47))
# print("sig47: ",sig47)
# print(type(sig47))
# print("length sig47: ",len(sig47list))
# print("\n")

sig47abund = utils.get_test_data('track_abund/47.fa.sig')
# sig47abundlist = list(load_signatures(sig47abund))
# print("sig47abund: ",sig47abund)
# print(type(sig47abund))
# print("length sig47abund: ",len(sig47abundlist))
# print("\n")

multisig = utils.get_test_data('47+63-multisig.sig')
# multisiglist = list(load_signatures(multisig))
# print("multisig: ",multisig)
# print(type(multisig))
# print("length multisig: ",len(multisiglist))
# print("\n")

filelist = c.output("filelist")

with open(filelist, 'w') as f:
f.write("\n".join((sig47, sig47abund, multisig)))

Expand All @@ -814,16 +796,48 @@ def test_sig_cat_filelist(c):
# sort the signatures by something deterministic and unique
siglist.sort(key = lambda x: x.md5sum())

# print(len(siglist))
assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8)]"""


@utils.in_tempdir
def test_sig_cat_4_filelist_with_dbs(c):
# cat using a file list as input
sig47 = utils.get_test_data('47.fa.sig')
sig47abund = utils.get_test_data('track_abund/47.fa.sig')
sbt = utils.get_test_data('v6.sbt.zip')

filelist = c.output("filelist")
with open(filelist, 'w') as f:
f.write("\n".join((sig47, sig47abund, sbt)))

c.run_sourmash('sig', 'cat', filelist,
'-o', 'out.sig')

# stdout should be same signatures
out = c.output('out.sig')

siglist = list(load_signatures(out))
print(len(siglist))
# print("siglist: ",siglist)
# print("\n")
# print("\n")

assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8)]"""
# verify the number of signatures matches what we expect to see based
# on the input files
all_sigs = []
all_sigs += list(load_signatures(sig47))
all_sigs += list(load_signatures(sig47abund))
all_sigs += list(sourmash.load_file_as_signatures(sbt))

assert len(all_sigs) == len(siglist)

# sort the signatures by something deterministic and unique
siglist.sort(key = lambda x: x.md5sum())

assert repr(siglist) == """[SourmashSignature('', 0107d767), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('', 4e94e602), SourmashSignature('', 60f7e23c), SourmashSignature('', 6d6e87e1), SourmashSignature('', b59473c9), SourmashSignature('', f0c834bc), SourmashSignature('', f71e7817)]"""


@utils.in_tempdir
def test_sig_cat_filelist_with_dbs(c):
def test_sig_cat_5_from_file(c):
# cat using a file list as input
sig47 = utils.get_test_data('47.fa.sig')
sig47abund = utils.get_test_data('track_abund/47.fa.sig')
Expand All @@ -833,7 +847,7 @@ def test_sig_cat_filelist_with_dbs(c):
with open(filelist, 'w') as f:
f.write("\n".join((sig47, sig47abund, sbt)))

c.run_sourmash('sig', 'cat', filelist,
c.run_sourmash('sig', 'cat', '--from-file', filelist,
'-o', 'out.sig')

# stdout should be same signatures
Expand Down