Skip to content

Commit

Permalink
[MRG] add sig cat --from-file (#1657)
Browse files Browse the repository at this point in the history
* add sig cat --from-file

* improve output a bit

* add test for sig cat --from-file
  • Loading branch information
ctb authored Jul 13, 2021
1 parent eedf394 commit 45f9cd6
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 27 deletions.
6 changes: 5 additions & 1 deletion src/sourmash/cli/sig/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@

def subparser(subparsers):
subparser = subparsers.add_parser('cat')
subparser.add_argument('signatures', nargs='+')
subparser.add_argument('signatures', nargs='*')
subparser.add_argument(
'--from-file',
help='a text file containing a list of files to load signatures from'
)
subparser.add_argument(
'-q', '--quiet', action='store_true',
help='suppress non-error output'
Expand Down
5 changes: 5 additions & 0 deletions src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ def cat(args):
save_sigs = sourmash_args.SaveSignaturesToLocation(args.output)
save_sigs.open()

if args.from_file:
more_files = sourmash_args.load_pathlist_from_file(args.from_file)
args.signatures = list(args.signatures)
args.signatures.extend(more_files)

for sigfile in args.signatures:
try:
loader = sourmash_args.load_file_as_signatures(sigfile,
Expand Down
5 changes: 3 additions & 2 deletions src/sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def short_notify(self, msg_template, *args, **kwargs):
notify(msg, end=end)

def notify(self, location):
self.short_notify(f"...reading from file '{location}'", end='\r')
self.short_notify(f"...{self.n_sig} sigs so far. Now reading from file '{location}'", end='\r')

def start_file(self, location, loader):
n_this = 0
Expand All @@ -581,7 +581,8 @@ def start_file(self, location, loader):
finally:
self.n_sig += n_this

self.short_notify(f"loaded {n_this} sigs from '{location}'")
self.short_notify(f"Loaded {n_this} sigs from '{location}'",
end='\r')


#
Expand Down
62 changes: 38 additions & 24 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,31 +763,13 @@ def test_sig_cat_2_out_inplace(c):


@utils.in_tempdir
def test_sig_cat_filelist(c):
def test_sig_cat_3_filelist(c):
# cat using a file list as input
sig47 = utils.get_test_data('47.fa.sig')
# sig47list = list(load_signatures(sig47))
# print("sig47: ",sig47)
# print(type(sig47))
# print("length sig47: ",len(sig47list))
# print("\n")

sig47abund = utils.get_test_data('track_abund/47.fa.sig')
# sig47abundlist = list(load_signatures(sig47abund))
# print("sig47abund: ",sig47abund)
# print(type(sig47abund))
# print("length sig47abund: ",len(sig47abundlist))
# print("\n")

multisig = utils.get_test_data('47+63-multisig.sig')
# multisiglist = list(load_signatures(multisig))
# print("multisig: ",multisig)
# print(type(multisig))
# print("length multisig: ",len(multisiglist))
# print("\n")

filelist = c.output("filelist")

with open(filelist, 'w') as f:
f.write("\n".join((sig47, sig47abund, multisig)))

Expand All @@ -814,16 +796,48 @@ def test_sig_cat_filelist(c):
# sort the signatures by something deterministic and unique
siglist.sort(key = lambda x: x.md5sum())

# print(len(siglist))
assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8)]"""


@utils.in_tempdir
def test_sig_cat_4_filelist_with_dbs(c):
# cat using a file list as input
sig47 = utils.get_test_data('47.fa.sig')
sig47abund = utils.get_test_data('track_abund/47.fa.sig')
sbt = utils.get_test_data('v6.sbt.zip')

filelist = c.output("filelist")
with open(filelist, 'w') as f:
f.write("\n".join((sig47, sig47abund, sbt)))

c.run_sourmash('sig', 'cat', filelist,
'-o', 'out.sig')

# stdout should be same signatures
out = c.output('out.sig')

siglist = list(load_signatures(out))
print(len(siglist))
# print("siglist: ",siglist)
# print("\n")
# print("\n")

assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8)]"""
# verify the number of signatures matches what we expect to see based
# on the input files
all_sigs = []
all_sigs += list(load_signatures(sig47))
all_sigs += list(load_signatures(sig47abund))
all_sigs += list(sourmash.load_file_as_signatures(sbt))

assert len(all_sigs) == len(siglist)

# sort the signatures by something deterministic and unique
siglist.sort(key = lambda x: x.md5sum())

assert repr(siglist) == """[SourmashSignature('', 0107d767), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('', 4e94e602), SourmashSignature('', 60f7e23c), SourmashSignature('', 6d6e87e1), SourmashSignature('', b59473c9), SourmashSignature('', f0c834bc), SourmashSignature('', f71e7817)]"""


@utils.in_tempdir
def test_sig_cat_filelist_with_dbs(c):
def test_sig_cat_5_from_file(c):
# cat using a file list as input
sig47 = utils.get_test_data('47.fa.sig')
sig47abund = utils.get_test_data('track_abund/47.fa.sig')
Expand All @@ -833,7 +847,7 @@ def test_sig_cat_filelist_with_dbs(c):
with open(filelist, 'w') as f:
f.write("\n".join((sig47, sig47abund, sbt)))

c.run_sourmash('sig', 'cat', filelist,
c.run_sourmash('sig', 'cat', '--from-file', filelist,
'-o', 'out.sig')

# stdout should be same signatures
Expand Down

0 comments on commit 45f9cd6

Please sign in to comment.