From 74de59ad7dfad889a385e4f4f5b48e25b8c3a9b3 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Thu, 17 Jun 2021 19:23:39 -0700
Subject: [PATCH] [MRG] add picklists to selector protocol and provide initial
 `Index` support (#1588)

* various cleanups of sourmash_args

* cleanup flakes errors

* clean up sourmash.sig submodule

* initial picklist implementation

* integrate picklists into sourmash sig extract

* basic tests for picklist functionality

* track found etc

* add picklists to selectors

* split pickfile out a little bit

* split column_type out of SignaturePicklist a bit

* picklist tests for .signatures() methods on Index classes

* split pickfile out a little bit

* split column_type out of SignaturePicklist a bit

* test 'Index.find' on picklists for SBTs and LCAs

* factor out picklist checks to 'passes_all_picklists' fn

* update comments, constructor, etc.

* fix tests :)

* more picklist tests

* verify output

* add --picklist-require-all &c

* documentation

* test with --md5 selector

* cover untested code with tests

* trap errors and be nice to users

* remove comment

* fix tests for new SignaturePicklist

* move picklist.py from sourmash.sig into sourmash

* move picklist reporting into sourmash_args

* fix space

* add picklist args throughout, eek.

* add picklists and tests for search, gather, index

* add picklists to prefetch

* add picklists to sourmash compare

* add picklists to lca index

* block multiple picklists on SBTs and LCAs, for now

* add picklist test that checks indexing-and-then-search == index

* add a test for using prefetch CSV as picklist

* remove debugging print

* add docs

* remove order dependence from test

* further attempt to fix test

Co-authored-by: Tessa Pierce Ward <bluegenes@users.noreply.github.com>
---
 doc/command-line.md                           |  99 ++++++----
 src/sourmash/cli/compare.py                   |   4 +-
 src/sourmash/cli/gather.py                    |   9 +-
 src/sourmash/cli/index.py                     |   6 +-
 src/sourmash/cli/lca/index.py                 |   9 +-
 src/sourmash/cli/prefetch.py                  |   4 +-
 src/sourmash/cli/search.py                    |   4 +-
 src/sourmash/cli/sig/extract.py               |  12 +-
 src/sourmash/cli/utils.py                     |  10 ++
 src/sourmash/commands.py                      |  42 ++++-
 src/sourmash/index.py                         |   5 +-
 src/sourmash/lca/command_index.py             |   5 +
 src/sourmash/lca/lca_db.py                    |  17 +-
 src/sourmash/{sig => }/picklist.py            |   9 +-
 src/sourmash/sbt.py                           |  19 +-
 src/sourmash/sig/__main__.py                  |  44 +----
 src/sourmash/sourmash_args.py                 |  47 ++++-
 tests/test-data/gather/all-picklist.csv       |  37 ++++
 tests/test-data/gather/campy-picklist.csv     |   4 +
 .../test-data/gather/salmonella-picklist.csv  |  25 +++
 .../test-data/gather/thermotoga-picklist.csv  |  10 ++
 tests/test_cmd_signature.py                   |   4 +-
 tests/test_index.py                           |  24 +++
 tests/test_lca.py                             | 104 +++++++++++
 tests/test_prefetch.py                        |  24 +++
 tests/test_sbt.py                             |  97 +++++++++-
 tests/test_sourmash.py                        | 169 ++++++++++++++++++
 27 files changed, 729 insertions(+), 114 deletions(-)
 rename src/sourmash/{sig => }/picklist.py (95%)
 create mode 100644 tests/test-data/gather/all-picklist.csv
 create mode 100644 tests/test-data/gather/campy-picklist.csv
 create mode 100644 tests/test-data/gather/salmonella-picklist.csv
 create mode 100644 tests/test-data/gather/thermotoga-picklist.csv

diff --git a/doc/command-line.md b/doc/command-line.md
index 051134b98f..6e7ceda09a 100644
--- a/doc/command-line.md
+++ b/doc/command-line.md
@@ -177,15 +177,14 @@ sourmash compare file1.sig [ file2.sig ... ]
 ```
 
 Options:
-```
---output -- save the distance matrix to this file (as a numpy binary matrix)
---ksize -- do the comparisons at this k-mer size.
---containment -- calculate containment instead of similarity.
-        C(i, j) = size(i intersection j) / size(i).
---from-file -- append the list of files in this text file to the input
+
+* `--output` -- save the distance matrix to this file (as a numpy binary matrix)
+* `--ksize` -- do the comparisons at this k-mer size.
+* `--containment` -- calculate containment instead of similarity; `C(i, j) = size(i intersection j) / size(i)`
+* `--from-file` -- append the list of files in this text file to the input
         signatures.
---ignore-abundance -- ignore abundances in signatures.
-```
+* `--ignore-abundance` -- ignore abundances in signatures.
+* `--picklist` -- select a subset of signatures with [a picklist](#using-picklists-to-subset-large-collections-of-signatures)
 
 **Note:** compare by default produces a symmetric similarity matrix that can be used as an input to clustering. With `--containment`, however, this matrix is no longer symmetric and cannot formally be used for clustering.
 
@@ -249,6 +248,9 @@ similarity   match
 ...    
 ```
 
+Note, as of sourmash 4.2.0, `search` supports `--picklist`, to
+[select a subset of signatures based on a CSV file](#using-picklists-to-subset-large-collections-of-signatures).
+
 ### `sourmash gather` - find metagenome members
 
 The `gather` subcommand selects the best reference genomes to use for
@@ -289,6 +291,9 @@ which matches are no longer reported; by default, this is set to
 50kb. see the Appendix in
 [Classifying Signatures](classifying-signatures.md) for details.
 
+As of sourmash 4.2.0, `gather` supports `--picklist`, to
+[select a subset of signatures based on a CSV file](#using-picklists-to-subset-large-collections-of-signatures).
+
 Note:
 
 Use `sourmash gather` to classify a metagenome against a collection of
@@ -350,6 +355,9 @@ containing a list of file names to index; you can also provide individual
 signature files, directories full of signatures, or other sourmash
 databases.
 
+As of sourmash 4.2.0, `index` supports `--picklist`, to
+[select a subset of signatures based on a CSV file](#using-picklists-to-subset-large-collections-of-signatures).
+
 ### `sourmash prefetch` - select subsets of very large databases for more processing
 
 The `prefetch` subcommand searches a collection of scaled signatures
@@ -375,6 +383,7 @@ Other options include:
 * `--threshold-bp` to require a minimum estimated bp overlap for output;
 * `--scaled` for downsampling;
 * `--force` to continue past survivable errors;
+* `--picklist` select a subset of signatures with [a picklist](#using-picklists-to-subset-large-collections-of-signatures)
 
 ### Alternative search mode for low-memory (but slow) search: `--linear`
 
@@ -589,6 +598,9 @@ see
 You can use `--from-file` to pass `lca index` a text file containing a
 list of file names to index.
 
+As of sourmash 4.2.0, `lca index` supports `--picklist`, to
+[select a subset of signatures based on a CSV file](#using-picklists-to-subset-large-collections-of-signatures).
+
 ### `sourmash lca rankinfo` - examine an LCA database
 
 The `sourmash lca rankinfo` command displays k-mer specificity
@@ -821,36 +833,8 @@ will extract the same signature, which has an accession number of
 #### Using picklists with `sourmash sig extract`
 
 As of sourmash 4.2.0, `extract` also supports picklists, a feature by
-which you can select signatures based on values in a CSV file.
-
-For example,
-```
-sourmash sig extract --picklist list.csv:md5:md5sum <signatures>
-```
-will extract only the signatures that have md5sums matching the
-column `md5sum` in the CSV file `list.csv`.
-
-The `--picklist` argument string must be of the format
-`pickfile:colname:coltype`, where `pickfile` is the path to a CSV
-file, `colname` is the name of the column to select from the CSV
-file (based on the headers in the first line of the CSV file),
-and `coltype` is the type of match.
-
-The following `coltype`s are currently supported by `sourmash sig extract`:
-
-* `name` - exact match to signature's name
-* `md5` - exact match to signature's md5sum
-* `md5prefix8` - match to 8-character prefix of signature's md5sum
-* `md5short` - same as `md5prefix8`
-* `ident` - exact match to signature's identifier
-* `identprefix` - match to signature's identifier, before '.'
-
-Identifiers are constructed by using the first space delimited word in
-the signature name.
-
-One way to build a picklist is to use `sourmash sig describe --csv
-out.csv <signatures>` to construct an initial CSV file that you can
-then edit further.
+which you can select signatures based on values in a CSV file. See
+[Using picklists to subset large collections of signatures](#using-picklists-to-subset-large-collections-of-signatures), below.
 
 ### `sourmash signature flatten` - remove abundance information from signatures
 
@@ -963,6 +947,45 @@ signatures with multiple ksizes or moltypes at the same time; you need
 to pick the ksize and moltype to use for your search. Where possible,
 scaled values will be made compatible.
 
+### Using picklists to subset large collections of signatures
+
+As of sourmash 4.2.0, many commands support *picklists*, a feature by
+which you can select or "pick out" signatures based on values in a CSV
+file.
+
+For example,
+```
+sourmash sig extract --picklist list.csv:md5:md5sum <signatures>
+```
+will extract only the signatures that have md5sums matching the
+column `md5sum` in the CSV file `list.csv`.
+
+The `--picklist` argument string must be of the format
+`pickfile:colname:coltype`, where `pickfile` is the path to a CSV
+file, `colname` is the name of the column to select from the CSV
+file (based on the headers in the first line of the CSV file),
+and `coltype` is the type of match.
+
+The following `coltype`s are currently supported by `sourmash sig extract`:
+
+* `name` - exact match to signature's name
+* `md5` - exact match to signature's md5sum
+* `md5prefix8` - match to 8-character prefix of signature's md5sum
+* `md5short` - same as `md5prefix8`
+* `ident` - exact match to signature's identifier
+* `identprefix` - match to signature's identifier, before '.'
+
+Identifiers are constructed by using the first space delimited word in
+the signature name.
+
+One way to build a picklist is to use `sourmash sig describe --csv
+out.csv <signatures>` to construct an initial CSV file that you can
+then edit further.
+
+In addition to `sig extract`, the following commands support
+`--picklist` selection: `index`, `search`, `gather`, `prefetch`,
+`compare`, `index`, and `lca index`.
+
 ### Storing (and searching) signatures
   
 Backing up a little, there are many ways to store and search
diff --git a/src/sourmash/cli/compare.py b/src/sourmash/cli/compare.py
index dcec015bd5..f7387f68b2 100644
--- a/src/sourmash/cli/compare.py
+++ b/src/sourmash/cli/compare.py
@@ -1,6 +1,7 @@
 """compare sequence signatures made by compute"""
 
-from sourmash.cli.utils import add_ksize_arg, add_moltype_args
+from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
+                                add_picklist_args)
 
 
 def subparser(subparsers):
@@ -47,6 +48,7 @@ def subparser(subparsers):
     subparser.add_argument(
         '-p', '--processes', metavar='N', type=int, default=None,
         help='Number of processes to use to calculate similarity')
+    add_picklist_args(subparser)
 
 
 def main(args):
diff --git a/src/sourmash/cli/gather.py b/src/sourmash/cli/gather.py
index 3d2e6d1a24..6e0addd427 100644
--- a/src/sourmash/cli/gather.py
+++ b/src/sourmash/cli/gather.py
@@ -1,6 +1,7 @@
 """search a metagenome signature against dbs"""
 
-from sourmash.cli.utils import add_ksize_arg, add_moltype_args
+from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
+                                add_picklist_args)
 
 
 def subparser(subparsers):
@@ -60,8 +61,6 @@ def subparser(subparsers):
         '--cache-size', default=0, type=int, metavar='N',
         help='number of internal SBT nodes to cache in memory (default: 0, cache all nodes)'
     )
-    add_ksize_arg(subparser, 31)
-    add_moltype_args(subparser)
 
     # advanced parameters
     subparser.add_argument(
@@ -80,6 +79,10 @@ def subparser(subparsers):
         help="use prefetch before gather; see documentation",
     )
 
+    add_ksize_arg(subparser, 31)
+    add_moltype_args(subparser)
+    add_picklist_args(subparser)
+
 
 def main(args):
     import sourmash
diff --git a/src/sourmash/cli/index.py b/src/sourmash/cli/index.py
index 1be7f06690..334b394bfe 100644
--- a/src/sourmash/cli/index.py
+++ b/src/sourmash/cli/index.py
@@ -25,7 +25,8 @@
 ---
 """
 
-from sourmash.cli.utils import add_moltype_args, add_ksize_arg
+from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
+                                add_picklist_args)
 
 
 def subparser(subparsers):
@@ -44,7 +45,6 @@ def subparser(subparsers):
         '-q', '--quiet', action='store_true',
         help='suppress non-error output'
     )
-    add_ksize_arg(subparser, 31)
     subparser.add_argument(
         '-d', '--n_children', metavar='D', type=int, default=2,
         help='number of children for internal nodes; default=2'
@@ -70,7 +70,9 @@ def subparser(subparsers):
         '--scaled', metavar='FLOAT', type=float, default=0,
         help='downsample signatures to the specified scaled factor'
     )
+    add_ksize_arg(subparser, 31)
     add_moltype_args(subparser)
+    add_picklist_args(subparser)
 
 
 def main(args):
diff --git a/src/sourmash/cli/lca/index.py b/src/sourmash/cli/lca/index.py
index 581ff63dcd..09bc7f75fb 100644
--- a/src/sourmash/cli/lca/index.py
+++ b/src/sourmash/cli/lca/index.py
@@ -1,6 +1,7 @@
 """create LCA database"""
 
-from sourmash.cli.utils import add_ksize_arg, add_moltype_args
+from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
+                                add_picklist_args)
 
 
 def subparser(subparsers):
@@ -18,8 +19,6 @@ def subparser(subparsers):
     subparser.add_argument(
         '--scaled', metavar='S', default=10000, type=float
     )
-    add_ksize_arg(subparser, 31)
-    add_moltype_args(subparser)
     subparser.add_argument(
         '-q', '--quiet', action='store_true',
         help='suppress non-error output'
@@ -53,6 +52,10 @@ def subparser(subparsers):
         help='ignore signatures with no taxonomy entry'
     )
 
+    add_ksize_arg(subparser, 31)
+    add_moltype_args(subparser)
+    add_picklist_args(subparser)
+
 
 def main(args):
     import sourmash
diff --git a/src/sourmash/cli/prefetch.py b/src/sourmash/cli/prefetch.py
index 27a254c68e..e04c537193 100644
--- a/src/sourmash/cli/prefetch.py
+++ b/src/sourmash/cli/prefetch.py
@@ -1,6 +1,7 @@
 """search a signature against dbs, find all overlaps"""
 
-from sourmash.cli.utils import add_ksize_arg, add_moltype_args
+from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
+                                add_picklist_args)
 
 
 def subparser(subparsers):
@@ -63,6 +64,7 @@ def subparser(subparsers):
     )
     add_ksize_arg(subparser, 31)
     add_moltype_args(subparser)
+    add_picklist_args(subparser)
 
 
 def main(args):
diff --git a/src/sourmash/cli/search.py b/src/sourmash/cli/search.py
index 9ff4ab9985..c4e1d41323 100644
--- a/src/sourmash/cli/search.py
+++ b/src/sourmash/cli/search.py
@@ -1,6 +1,7 @@
 """search a signature against other signatures"""
 
-from sourmash.cli.utils import add_ksize_arg, add_moltype_args
+from sourmash.cli.utils import (add_ksize_arg, add_moltype_args,
+                                add_picklist_args)
 
 
 def subparser(subparsers):
@@ -59,6 +60,7 @@ def subparser(subparsers):
     )
     add_ksize_arg(subparser, 31)
     add_moltype_args(subparser)
+    add_picklist_args(subparser)
 
 
 def main(args):
diff --git a/src/sourmash/cli/sig/extract.py b/src/sourmash/cli/sig/extract.py
index 81bc788a93..9ea71eb229 100644
--- a/src/sourmash/cli/sig/extract.py
+++ b/src/sourmash/cli/sig/extract.py
@@ -2,7 +2,8 @@
 
 import sys
 
-from sourmash.cli.utils import add_moltype_args, add_ksize_arg
+from sourmash.cli.utils import (add_moltype_args, add_ksize_arg,
+                                add_picklist_args)
 
 
 def subparser(subparsers):
@@ -25,16 +26,9 @@ def subparser(subparsers):
         '--name', default=None,
         help='select signatures whose name contains this substring'
     )
-    subparser.add_argument(
-        '--picklist', default=None,
-        help="select signatures based on a picklist, i.e. 'file.csv:colname:coltype'"
-    )
-    subparser.add_argument(
-        '--picklist-require-all', default=False, action='store_true',
-        help="require that all picklist values be found or else fail"
-    )
     add_ksize_arg(subparser, 31)
     add_moltype_args(subparser)
+    add_picklist_args(subparser)
 
 
 def main(args):
diff --git a/src/sourmash/cli/utils.py b/src/sourmash/cli/utils.py
index 4bb918643a..1409ed6225 100644
--- a/src/sourmash/cli/utils.py
+++ b/src/sourmash/cli/utils.py
@@ -50,6 +50,16 @@ def add_ksize_arg(parser, default=31):
         help='k-mer size; default={d}'.format(d=default)
     )
 
+def add_picklist_args(parser):
+    parser.add_argument(
+        '--picklist', default=None,
+        help="select signatures based on a picklist, i.e. 'file.csv:colname:coltype'"
+    )
+    parser.add_argument(
+        '--picklist-require-all', default=False, action='store_true',
+        help="require that all picklist values be found or else fail"
+    )
+
 
 def opfilter(path):
     return not path.startswith('__') and path not in ['utils']
diff --git a/src/sourmash/commands.py b/src/sourmash/commands.py
index 0d8a9f2266..ec18dfd0e9 100644
--- a/src/sourmash/commands.py
+++ b/src/sourmash/commands.py
@@ -28,6 +28,7 @@ def compare(args):
 
     set_quiet(args.quiet)
     moltype = sourmash_args.calculate_moltype(args)
+    picklist = sourmash_args.load_picklist(args)
 
     inp_files = list(args.signatures)
     if args.from_file:
@@ -45,11 +46,12 @@ def compare(args):
         loaded = sourmash_args.load_file_as_signatures(filename,
                                                        ksize=args.ksize,
                                                        select_moltype=moltype,
+                                                       picklist=picklist,
                                                        yield_all_files=args.force,
                                                        progress=progress)
         loaded = list(loaded)
         if not loaded:
-            notify('\nwarning: no signatures loaded at given ksize/molecule type from {}', filename)
+            notify('\nwarning: no signatures loaded at given ksize/molecule type/picklist from {}', filename)
         siglist.extend(loaded)
 
         # track ksizes/moltypes
@@ -79,6 +81,9 @@ def compare(args):
     notify(' '*79, end='\r')
     notify('loaded {} signatures total.'.format(len(siglist)))
 
+    if picklist:
+        sourmash_args.report_picklist(args, picklist)
+
     # check to make sure they're potentially compatible - either using
     # scaled, or not.
     scaled_sigs = [s.minhash.scaled for s in siglist]
@@ -336,6 +341,7 @@ def index(args):
     """
     set_quiet(args.quiet)
     moltype = sourmash_args.calculate_moltype(args)
+    picklist = sourmash_args.load_picklist(args)
 
     if args.append:
         tree = load_sbt_index(args.sbt_name)
@@ -372,6 +378,7 @@ def index(args):
                                                         ksize=args.ksize,
                                                         select_moltype=moltype,
                                                         yield_all_files=args.force,
+                                                        picklist=picklist,
                                                         progress=progress)
 
         # load all matching signatures in this file
@@ -417,6 +424,9 @@ def index(args):
         error('no signatures found to load into tree!? failing.')
         sys.exit(-1)
 
+    if picklist:
+        sourmash_args.report_picklist(args, picklist)
+
     notify('loaded {} sigs; saving SBT under "{}"', n, args.sbt_name)
     tree.save(args.sbt_name, sparseness=args.sparseness)
     if tree.storage:
@@ -429,6 +439,7 @@ def search(args):
 
     set_quiet(args.quiet)
     moltype = sourmash_args.calculate_moltype(args)
+    picklist = sourmash_args.load_picklist(args)
 
     # set up the query.
     query = sourmash_args.load_query_signature(args.query,
@@ -458,7 +469,8 @@ def search(args):
             sys.exit(-1)
 
     databases = sourmash_args.load_dbs_and_sigs(args.databases, query,
-                                                not is_containment)
+                                                not is_containment,
+                                                picklist=picklist)
 
     if not len(databases):
         error('Nothing found to search!')
@@ -531,6 +543,9 @@ def search(args):
             for sr in results:
                 save_sig.add(sr.match)
 
+    if picklist:
+        sourmash_args.report_picklist(args, picklist)
+
 
 def categorize(args):
     "Use a database to find the best match to many signatures."
@@ -615,6 +630,7 @@ def gather(args):
 
     set_quiet(args.quiet, args.debug)
     moltype = sourmash_args.calculate_moltype(args)
+    picklist = sourmash_args.load_picklist(args)
 
     # load the query signature & figure out all the things
     query = sourmash_args.load_query_signature(args.query,
@@ -646,7 +662,8 @@ def gather(args):
     if args.cache_size == 0:
         cache_size = None
     databases = sourmash_args.load_dbs_and_sigs(args.databases, query, False,
-                                                cache_size=cache_size)
+                                                cache_size=cache_size,
+                                                picklist=picklist)
 
     if not len(databases):
         error('Nothing found to search!')
@@ -664,7 +681,12 @@ def gather(args):
 
         counters = []
         for db in databases:
-            counter = db.counter_gather(prefetch_query, args.threshold_bp)
+            try:
+                counter = db.counter_gather(prefetch_query, args.threshold_bp)
+            except ValueError:
+                if picklist:
+                    # catch "no signatures to search" ValueError...
+                    continue
             save_prefetch.add_many(counter.siglist)
             counters.append(counter)
 
@@ -769,6 +791,10 @@ def gather(args):
 
             with FileOutput(args.output_unassigned, 'wt') as fp:
                 sig.save_signatures([ next_query ], fp)
+
+    if picklist:
+        sourmash_args.report_picklist(args, picklist)
+
     # DONE w/gather function.
 
 
@@ -1052,6 +1078,7 @@ def prefetch(args):
     # figure out what k-mer size and molecule type we're looking for here
     ksize = args.ksize
     moltype = sourmash_args.calculate_moltype(args)
+    picklist = sourmash_args.load_picklist(args)
 
     # load the query signature & figure out all the things
     query = sourmash_args.load_query_signature(args.query,
@@ -1120,7 +1147,8 @@ def prefetch(args):
             db = LazyLinearIndex(db)
 
         db = db.select(ksize=ksize, moltype=moltype,
-                       containment=True, scaled=True)
+                       containment=True, scaled=True,
+                       picklist=picklist)
 
         if not db:
             notify(f"...no compatible signatures in '{dbfilename}'; skipping")
@@ -1185,5 +1213,7 @@ def prefetch(args):
         with open(filename, "wt") as fp:
             sig.save_signatures([ss], fp)
 
+    if picklist:
+        sourmash_args.report_picklist(args, picklist)
+
     return 0
-    
diff --git a/src/sourmash/index.py b/src/sourmash/index.py
index 2aec59283c..b344a3cabc 100644
--- a/src/sourmash/index.py
+++ b/src/sourmash/index.py
@@ -291,7 +291,7 @@ def select(self, ksize=None, moltype=None, scaled=None, num=None,
 
 
 def select_signature(ss, ksize=None, moltype=None, scaled=0, num=0,
-                     containment=False):
+                     containment=False, picklist=None):
     "Check that the given signature matches the specificed requirements."
     # ksize match?
     if ksize and ksize != ss.minhash.ksize:
@@ -318,6 +318,9 @@ def select_signature(ss, ksize=None, moltype=None, scaled=0, num=0,
         if ss.minhash.scaled or num != ss.minhash.num:
             return False
 
+    if picklist is not None and ss not in picklist:
+        return False
+
     return True
 
 
diff --git a/src/sourmash/lca/command_index.py b/src/sourmash/lca/command_index.py
index 883d861a75..8f7aace1f4 100644
--- a/src/sourmash/lca/command_index.py
+++ b/src/sourmash/lca/command_index.py
@@ -145,6 +145,7 @@ def index(args):
         args.ksize = DEFAULT_LOAD_K
 
     moltype = sourmash_args.calculate_moltype(args, default='DNA')
+    picklist = sourmash_args.load_picklist(args)
 
     notify('Building LCA database with ksize={} scaled={} moltype={}.',
            args.ksize, args.scaled, moltype)
@@ -190,6 +191,7 @@ def index(args):
         n += 1
         it = load_file_as_signatures(filename, ksize=args.ksize,
                                      select_moltype=moltype,
+                                     picklist=picklist,
                                      yield_all_files=args.force)
         for sig in it:
             notify(u'\r\033[K', end=u'')
@@ -265,6 +267,9 @@ def index(args):
     notify('loaded {} hashes at ksize={} scaled={}', len(db.hashval_to_idx),
            args.ksize, args.scaled)
 
+    if picklist:
+        sourmash_args.report_picklist(args, picklist)
+
     # summarize:
     notify('{} assigned lineages out of {} distinct lineages in spreadsheet.',
            len(record_used_lineages), len(set(assignments.values())))
diff --git a/src/sourmash/lca/lca_db.py b/src/sourmash/lca/lca_db.py
index a3d90ffd5d..19cfdfb11d 100644
--- a/src/sourmash/lca/lca_db.py
+++ b/src/sourmash/lca/lca_db.py
@@ -9,6 +9,7 @@
 from sourmash.minhash import _get_max_hash_for_scaled
 from sourmash.logging import notify, error, debug
 from sourmash.index import Index, IndexSearchResult
+from sourmash.picklist import passes_all_picklists
 
 
 def cached_property(fun):
@@ -71,6 +72,7 @@ def __init__(self, ksize, scaled, moltype='DNA'):
         self.lineage_to_lid = {}
         self.lid_to_lineage = {}
         self.hashval_to_idx = defaultdict(set)
+        self.picklists = []
 
     @property
     def location(self):
@@ -176,7 +178,7 @@ def signatures(self):
             yield v
 
     def select(self, ksize=None, moltype=None, num=0, scaled=0,
-               containment=False):
+               containment=False, picklist=None):
         """Make sure this database matches the requested requirements.
 
         As with SBTs, queries with higher scaled values than the database
@@ -197,6 +199,11 @@ def select(self, ksize=None, moltype=None, num=0, scaled=0,
         if moltype is not None and moltype != self.moltype:
             raise ValueError(f"moltype on this database is {self.moltype}; this is different from requested moltype of {moltype}")
 
+        if picklist is not None:
+            self.picklists.append(picklist)
+            if len(self.picklists) > 1:
+                raise ValueError("we do not (yet) support multiple picklists for LCA databases")
+
         return self
 
     @classmethod
@@ -416,7 +423,10 @@ def _signatures(self):
         for idx, mh in mhd.items():
             ident = self.idx_to_ident[idx]
             name = self.ident_to_name[ident]
-            sigd[idx] = SourmashSignature(mh, name=name)
+            ss = SourmashSignature(mh, name=name)
+
+            if passes_all_picklists(ss, self.picklists):
+                sigd[idx] = SourmashSignature(mh, name=name)
 
         debug('=> {} signatures!', len(sigd))
         return sigd
@@ -478,7 +488,8 @@ def find(self, search_fn, query, **kwargs):
             # signal that it is done, or something.
             if search_fn.passes(score):
                 if search_fn.collect(score, subj):
-                    yield IndexSearchResult(score, subj, self.location)
+                    if passes_all_picklists(subj, self.picklists):
+                        yield IndexSearchResult(score, subj, self.location)
 
     @cached_property
     def lid_to_idx(self):
diff --git a/src/sourmash/sig/picklist.py b/src/sourmash/picklist.py
similarity index 95%
rename from src/sourmash/sig/picklist.py
rename to src/sourmash/picklist.py
index 4d07f309dc..11526f135b 100644
--- a/src/sourmash/sig/picklist.py
+++ b/src/sourmash/picklist.py
@@ -23,7 +23,6 @@ class SignaturePicklist:
     Initialize using ``SignaturePicklist.from_picklist_args(argstr)``,
     which takes an argument str like so: 'pickfile:column:coltype'.
 
-    # CTB pickfile or pickset?
     Here, 'pickfile' is the path to a CSV file; 'column' is the name of
     the column to select from the CSV file; and 'coltype' is the type of
     matching to do on that column.
@@ -142,3 +141,11 @@ def filter(self, it):
         for ss in it:
             if self.__contains__(ss):
                 yield ss
+
+
+def passes_all_picklists(ss, picklists):
+    "does the signature 'ss' pass all of the picklists?"
+    for picklist in picklists:
+        if ss not in picklist:
+            return False
+    return True
diff --git a/src/sourmash/sbt.py b/src/sourmash/sbt.py
index c31a689621..5206af1229 100644
--- a/src/sourmash/sbt.py
+++ b/src/sourmash/sbt.py
@@ -20,6 +20,7 @@
 from .sbt_storage import FSStorage, IPFSStorage, RedisStorage, ZipStorage
 from .logging import error, notify, debug
 from .index import Index, IndexSearchResult
+from .picklist import passes_all_picklists
 
 from .nodegraph import Nodegraph, extract_nodegraph_info, calc_expected_collisions
 
@@ -148,6 +149,7 @@ def __init__(self, factory, *, d=2, storage=None, cache_size=None):
             cache_size = sys.maxsize
         self._nodescache = _NodesCache(maxsize=cache_size)
         self._location = None
+        self.picklists = []
 
     @property
     def location(self):
@@ -155,10 +157,12 @@ def location(self):
 
     def signatures(self):
         for k in self.leaves():
-            yield k.data
+            ss = k.data
+            if passes_all_picklists(ss, self.picklists):
+                yield ss
 
     def select(self, ksize=None, moltype=None, num=0, scaled=0,
-               containment=False):
+               containment=False, picklist=None):
         """Make sure this database matches the requested requirements.
 
         Will always raise ValueError if a requirement cannot be met.
@@ -210,6 +214,11 @@ def select(self, ksize=None, moltype=None, num=0, scaled=0,
             if scaled > db_mh.scaled and not containment:
                 raise ValueError(f"search scaled value {scaled} is less than database scaled value of {db_mh.scaled}")
 
+        if picklist is not None:
+            self.picklists.append(picklist)
+            if len(self.picklists) > 1:
+                raise ValueError("we do not (yet) support multiple picklists for SBTs")
+
         return self
 
     def new_node_pos(self, node):
@@ -450,7 +459,11 @@ def node_search(node, *args, **kwargs):
 
         # & execute!
         for n in self._find_nodes(node_search, **kwargs):
-            yield IndexSearchResult(results[n.data], n.data, self.location)
+            ss = n.data
+
+            # filter on picklists
+            if passes_all_picklists(ss, self.picklists):
+                yield IndexSearchResult(results[ss], ss, self.location)
 
     def _rebuild_node(self, pos=0):
         """Recursively rebuilds an internal node (if it is not present).
diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py
index 90ffa78951..e15cd9840d 100644
--- a/src/sourmash/sig/__main__.py
+++ b/src/sourmash/sig/__main__.py
@@ -13,7 +13,7 @@
 from sourmash.logging import set_quiet, error, notify, print_results, debug
 from sourmash import sourmash_args
 from sourmash.minhash import _get_max_hash_for_scaled
-from .picklist import SignaturePicklist
+from sourmash.picklist import SignaturePicklist
 
 usage='''
 sourmash signature <command> [<args>] - manipulate/work with signature files.
@@ -542,35 +542,12 @@ def extract(args):
     """
     set_quiet(args.quiet)
     moltype = sourmash_args.calculate_moltype(args)
-
-    picklist = None
-    if args.picklist:
-        try:
-            picklist = SignaturePicklist.from_picklist_args(args.picklist)
-        except ValueError as exc:
-            error("ERROR: could not load picklist.")
-            error(str(exc))
-            sys.exit(-1)
-
-        notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")
-
-        n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)
-
-        notify(f"loaded {len(picklist.pickset)} distinct values into picklist.")
-        if n_empty_val:
-            notify(f"WARNING: {n_empty_val} empty values in column '{picklist.column_name}' in picklist file")
-        if dup_vals:
-            notify(f"WARNING: {len(dup_vals)} values in picklist column '{picklist.column_name}' were not distinct")
-        picklist_filter_fn = picklist.filter
-    else:
-        def picklist_filter_fn(it):
-            for ss in it:
-                yield ss
+    picklist = sourmash_args.load_picklist(args)
 
     # further filtering on md5 or name?
     if args.md5 is not None or args.name is not None:
         def filter_fn(it):
-            for ss in picklist_filter_fn(it):
+            for ss in it:
                 # match?
                 keep = False
                 if args.name and args.name in str(ss):
@@ -581,8 +558,10 @@ def filter_fn(it):
                 if keep:
                     yield ss
     else:
-        # whatever comes out of the picklist is fine
-        filter_fn = picklist_filter_fn
+        # whatever comes out of the database is fine
+        def filter_fn(it):
+            for ss in it:
+                yield ss
 
     # ok! filtering defined, let's go forward
     progress = sourmash_args.SignatureLoadingProgress()
@@ -594,6 +573,7 @@ def filter_fn(it):
         siglist = sourmash_args.load_file_as_signatures(filename,
                                                         ksize=args.ksize,
                                                         select_moltype=moltype,
+                                                        picklist=picklist,
                                                         progress=progress)
         for ss in filter_fn(siglist):
             save_sigs.add(ss)
@@ -608,13 +588,7 @@ def filter_fn(it):
     notify("extracted {} signatures from {} file(s)", len(save_sigs),
            len(args.signatures))
     if picklist:
-        notify(f"for given picklist, found {len(picklist.found)} matches to {len(picklist.pickset)} distinct values")
-        n_missing = len(picklist.pickset - picklist.found)
-        if n_missing:
-            notify(f"WARNING: {n_missing} missing picklist values.")
-            if args.picklist_require_all:
-                error("ERROR: failing because --picklist-require-all was set")
-                sys.exit(-1)
+        sourmash_args.report_picklist(args, picklist)
 
 
 def filter(args):
diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py
index 4085fa5bea..60850469c3 100644
--- a/src/sourmash/sourmash_args.py
+++ b/src/sourmash/sourmash_args.py
@@ -19,6 +19,8 @@
 
 from .index import (LinearIndex, ZipFileLinearIndex, MultiIndex)
 from . import signature as sigmod
+from .picklist import SignaturePicklist
+
 
 DEFAULT_LOAD_K = 31
 
@@ -57,6 +59,40 @@ def calculate_moltype(args, default=None):
     return moltype
 
 
+def load_picklist(args):
+    "Load a SignaturePicklist from --picklist arguments."
+    picklist = None
+    if args.picklist:
+        try:
+            picklist = SignaturePicklist.from_picklist_args(args.picklist)
+        except ValueError as exc:
+            error("ERROR: could not load picklist.")
+            error(str(exc))
+            sys.exit(-1)
+
+        notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")
+
+        n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)
+
+        notify(f"loaded {len(picklist.pickset)} distinct values into picklist.")
+        if n_empty_val:
+            notify(f"WARNING: {n_empty_val} empty values in column '{picklist.column_name}' in picklist file")
+        if dup_vals:
+            notify(f"WARNING: {len(dup_vals)} values in picklist column '{picklist.column_name}' were not distinct")
+
+    return picklist
+
+
+def report_picklist(args, picklist):
+    notify(f"for given picklist, found {len(picklist.found)} matches to {len(picklist.pickset)} distinct values")
+    n_missing = len(picklist.pickset - picklist.found)
+    if n_missing:
+        notify(f"WARNING: {n_missing} missing picklist values.")
+        if args.picklist_require_all:
+            error("ERROR: failing because --picklist-require-all was set")
+            sys.exit(-1)
+
+
 def load_query_signature(filename, ksize, select_moltype, select_md5=None):
     """Load a single signature to use as a query.
 
@@ -137,7 +173,8 @@ def traverse_find_sigs(filenames, yield_all_files=False):
                         yield fullname
 
 
-def load_dbs_and_sigs(filenames, query, is_similarity_query, *, cache_size=None):
+def load_dbs_and_sigs(filenames, query, is_similarity_query, *,
+                      cache_size=None, picklist=None):
     """
     Load one or more SBTs, LCAs, and/or collections of signatures.
 
@@ -179,6 +216,9 @@ def load_dbs_and_sigs(filenames, query, is_similarity_query, *, cache_size=None)
             notify(f"no compatible signatures found in '{filename}'")
             sys.exit(-1)
 
+        if picklist:
+            db = db.select(picklist=picklist)
+
         databases.append(db)
 
     # calc num loaded info.
@@ -351,6 +391,7 @@ def load_file_as_index(filename, *, yield_all_files=False):
 
 
 def load_file_as_signatures(filename, *, select_moltype=None, ksize=None,
+                            picklist=None,
                             yield_all_files=False,
                             progress=None):
     """Load 'filename' as a collection of signatures. Return an iterable.
@@ -367,13 +408,13 @@ def load_file_as_signatures(filename, *, select_moltype=None, ksize=None,
     underneath this directory into a list of signatures. If
     yield_all_files=True, will attempt to load all files.
 
-    Applies selector function if select_moltype and/or ksize are given.
+    Applies selector function if select_moltype, ksize or picklist are given.
     """
     if progress:
         progress.notify(filename)
 
     db = _load_database(filename, yield_all_files)
-    db = db.select(moltype=select_moltype, ksize=ksize)
+    db = db.select(moltype=select_moltype, ksize=ksize, picklist=picklist)
     loader = db.signatures()
 
     if progress is not None:
diff --git a/tests/test-data/gather/all-picklist.csv b/tests/test-data/gather/all-picklist.csv
new file mode 100644
index 0000000000..b9ebbaa522
--- /dev/null
+++ b/tests/test-data/gather/all-picklist.csv
@@ -0,0 +1,37 @@
+signature_file,md5,ksize,moltype,num,scaled,n_hashes,seed,with_abundance,name,filename,license
+GCF_000006945.2_ASM694v2_genomic.fna.gz.sig,323c1a1712b0949268dd6fb93be63ae2,11,DNA,0,10000,150,42,0,"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome",../fasta/GCF_000006945.2_ASM694v2_genomic.fna.gz,CC0
+GCF_000006945.2_ASM694v2_genomic.fna.gz.sig,263c2de20b597d6e33b81ec91d8672b5,21,DNA,0,10000,485,42,0,"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome",../fasta/GCF_000006945.2_ASM694v2_genomic.fna.gz,CC0
+GCF_000006945.2_ASM694v2_genomic.fna.gz.sig,dc12a6d8fd63122aa68f78facf9bed94,31,DNA,0,10000,490,42,0,"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome",../fasta/GCF_000006945.2_ASM694v2_genomic.fna.gz,CC0
+GCF_000007545.1_ASM754v1_genomic.fna.gz.sig,df24140b1c6cad16b30abeaf03019eb5,11,DNA,0,10000,158,42,0,"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome",../fasta/GCF_000007545.1_ASM754v1_genomic.fna.gz,CC0
+GCF_000007545.1_ASM754v1_genomic.fna.gz.sig,fd958e3b5649bc03890517ff239970ea,21,DNA,0,10000,445,42,0,"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome",../fasta/GCF_000007545.1_ASM754v1_genomic.fna.gz,CC0
+GCF_000007545.1_ASM754v1_genomic.fna.gz.sig,8c22dff88a2239607762da00f7fd1725,31,DNA,0,10000,472,42,0,"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome",../fasta/GCF_000007545.1_ASM754v1_genomic.fna.gz,CC0
+GCF_000008105.1_ASM810v1_genomic.fna.gz.sig,9db6efc92a041e11713ccfa8597edae5,11,DNA,0,10000,150,42,0,"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome",../fasta/GCF_000008105.1_ASM810v1_genomic.fna.gz,CC0
+GCF_000008105.1_ASM810v1_genomic.fna.gz.sig,8996699a05d3e5a05fa3fe94bfa41431,21,DNA,0,10000,472,42,0,"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome",../fasta/GCF_000008105.1_ASM810v1_genomic.fna.gz,CC0
+GCF_000008105.1_ASM810v1_genomic.fna.gz.sig,85c3aeec6457c0b1d210472ddeb67714,31,DNA,0,10000,468,42,0,"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome",../fasta/GCF_000008105.1_ASM810v1_genomic.fna.gz,CC0
+GCF_000008545.1_ASM854v1_genomic.fna.gz.sig,74b928d3db1f7f033c0dcca6c6e52aea,11,DNA,0,10000,84,42,0,"NC_000853.1 Thermotoga maritima MSB8 chromosome, complete genome",../fasta/GCF_000008545.1_ASM854v1_genomic.fna.gz,CC0
+GCF_000008545.1_ASM854v1_genomic.fna.gz.sig,ba9947e078cab29e20bc7d31bc1b9f0d,21,DNA,0,10000,192,42,0,"NC_000853.1 Thermotoga maritima MSB8 chromosome, complete genome",../fasta/GCF_000008545.1_ASM854v1_genomic.fna.gz,CC0
+GCF_000008545.1_ASM854v1_genomic.fna.gz.sig,1bfe96d76ec9cdb60779a1a9223c424e,31,DNA,0,10000,187,42,0,"NC_000853.1 Thermotoga maritima MSB8 chromosome, complete genome",../fasta/GCF_000008545.1_ASM854v1_genomic.fna.gz,CC0
+GCF_000009085.1_ASM908v1_genomic.fna.gz.sig,752280e9969ce750e2c80477c1b7b0e7,11,DNA,0,10000,61,42,0,"NC_002163.1 Campylobacter jejuni subsp. jejuni NCTC 11168 = ATCC 700819 chromosome, complete genome",../fasta/GCF_000009085.1_ASM908v1_genomic.fna.gz,CC0
+GCF_000009085.1_ASM908v1_genomic.fna.gz.sig,eba0eb3ce984cc53c36f134a752c52c5,21,DNA,0,10000,157,42,0,"NC_002163.1 Campylobacter jejuni subsp. jejuni NCTC 11168 = ATCC 700819 chromosome, complete genome",../fasta/GCF_000009085.1_ASM908v1_genomic.fna.gz,CC0
+GCF_000009085.1_ASM908v1_genomic.fna.gz.sig,953156e9f4da8cf22e7e0b4b88261fae,31,DNA,0,10000,167,42,0,"NC_002163.1 Campylobacter jejuni subsp. jejuni NCTC 11168 = ATCC 700819 chromosome, complete genome",../fasta/GCF_000009085.1_ASM908v1_genomic.fna.gz,CC0
+GCF_000009505.1_ASM950v1_genomic.fna.gz.sig,0f35aeadda1532ed450bd6de1e73545d,11,DNA,0,10000,148,42,0,NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome,../fasta/GCF_000009505.1_ASM950v1_genomic.fna.gz,CC0
+GCF_000009505.1_ASM950v1_genomic.fna.gz.sig,405ae3300f28ca5fe5c223cbf7e28734,21,DNA,0,10000,471,42,0,NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome,../fasta/GCF_000009505.1_ASM950v1_genomic.fna.gz,CC0
+GCF_000009505.1_ASM950v1_genomic.fna.gz.sig,0842f7edb426fc4fa2701c107e678279,31,DNA,0,10000,461,42,0,NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome,../fasta/GCF_000009505.1_ASM950v1_genomic.fna.gz,CC0
+GCF_000009525.1_ASM952v1_genomic.fna.gz.sig,d883538a0c983a863fa4b6e5fcd19612,11,DNA,0,10000,148,42,0,NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome,../fasta/GCF_000009525.1_ASM952v1_genomic.fna.gz,CC0
+GCF_000009525.1_ASM952v1_genomic.fna.gz.sig,9133bd71b86628b38c665ab7e5eb8712,21,DNA,0,10000,457,42,0,NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome,../fasta/GCF_000009525.1_ASM952v1_genomic.fna.gz,CC0
+GCF_000009525.1_ASM952v1_genomic.fna.gz.sig,afadabf39aec247929e84a29fd797117,31,DNA,0,10000,461,42,0,NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome,../fasta/GCF_000009525.1_ASM952v1_genomic.fna.gz,CC0
+GCF_000011885.1_ASM1188v1_genomic.fna.gz.sig,feef9e4d39fecd3d9292b76c0cc72b81,11,DNA,0,10000,155,42,0,"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome",../fasta/GCF_000011885.1_ASM1188v1_genomic.fna.gz,CC0
+GCF_000011885.1_ASM1188v1_genomic.fna.gz.sig,cc80cb247b195ca3dfa0756257d882b6,21,DNA,0,10000,427,42,0,"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome",../fasta/GCF_000011885.1_ASM1188v1_genomic.fna.gz,CC0
+GCF_000011885.1_ASM1188v1_genomic.fna.gz.sig,bb365606acbf08d183399f139af80c32,31,DNA,0,10000,459,42,0,"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome",../fasta/GCF_000011885.1_ASM1188v1_genomic.fna.gz,CC0
+GCF_000016045.1_ASM1604v1_genomic.fna.gz.sig,4cec832176c4831239faed42c0616ef4,11,DNA,0,10000,155,42,0,"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome",../fasta/GCF_000016045.1_ASM1604v1_genomic.fna.gz,CC0
+GCF_000016045.1_ASM1604v1_genomic.fna.gz.sig,43a9d80a4cd995779c7538a32088dd0e,21,DNA,0,10000,469,42,0,"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome",../fasta/GCF_000016045.1_ASM1604v1_genomic.fna.gz,CC0
+GCF_000016045.1_ASM1604v1_genomic.fna.gz.sig,d0cfbe22579f98fd5de2d41203589964,31,DNA,0,10000,480,42,0,"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome",../fasta/GCF_000016045.1_ASM1604v1_genomic.fna.gz,CC0
+GCF_000016785.1_ASM1678v1_genomic.fna.gz.sig,328f7b0643bdb6c76135292b5afc8fa7,11,DNA,0,10000,82,42,0,"NC_009486.1 Thermotoga petrophila RKU-1, complete genome",../fasta/GCF_000016785.1_ASM1678v1_genomic.fna.gz,CC0
+GCF_000016785.1_ASM1678v1_genomic.fna.gz.sig,a77789e831fcd2436c3b9e4e22fb173e,21,DNA,0,10000,190,42,0,"NC_009486.1 Thermotoga petrophila RKU-1, complete genome",../fasta/GCF_000016785.1_ASM1678v1_genomic.fna.gz,CC0
+GCF_000016785.1_ASM1678v1_genomic.fna.gz.sig,50d8efd580ff2000cb38d1f8cc9cf9b4,31,DNA,0,10000,185,42,0,"NC_009486.1 Thermotoga petrophila RKU-1, complete genome",../fasta/GCF_000016785.1_ASM1678v1_genomic.fna.gz,CC0
+GCF_000018945.1_ASM1894v1_genomic.fna.gz.sig,989f88420b193ef39c4dbe3b268e0049,11,DNA,0,10000,90,42,0,"NC_011978.1 Thermotoga neapolitana DSM 4359, complete genome",../fasta/GCF_000018945.1_ASM1894v1_genomic.fna.gz,CC0
+GCF_000018945.1_ASM1894v1_genomic.fna.gz.sig,bebcd0dcc0ed3b120ad16c4e15805370,21,DNA,0,10000,188,42,0,"NC_011978.1 Thermotoga neapolitana DSM 4359, complete genome",../fasta/GCF_000018945.1_ASM1894v1_genomic.fna.gz,CC0
+GCF_000018945.1_ASM1894v1_genomic.fna.gz.sig,4289d4241be8573145282352215ca3c4,31,DNA,0,10000,198,42,0,"NC_011978.1 Thermotoga neapolitana DSM 4359, complete genome",../fasta/GCF_000018945.1_ASM1894v1_genomic.fna.gz,CC0
+GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig,40df36a7eb411022be4b1d6a7af05496,11,DNA,0,10000,161,42,0,"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome",../fasta/GCF_000195995.1_ASM19599v1_genomic.fna.gz,CC0
+GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig,ffa92983f7e67454c407499cbfbabf88,21,DNA,0,10000,487,42,0,"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome",../fasta/GCF_000195995.1_ASM19599v1_genomic.fna.gz,CC0
+GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig,cb26db5716a213c9a6614021e7176c1d,31,DNA,0,10000,512,42,0,"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome",../fasta/GCF_000195995.1_ASM19599v1_genomic.fna.gz,CC0
diff --git a/tests/test-data/gather/campy-picklist.csv b/tests/test-data/gather/campy-picklist.csv
new file mode 100644
index 0000000000..5490c2de61
--- /dev/null
+++ b/tests/test-data/gather/campy-picklist.csv
@@ -0,0 +1,4 @@
+signature_file,md5,ksize,moltype,num,scaled,n_hashes,seed,with_abundance,name,filename,license
+GCF_000009085.1_ASM908v1_genomic.fna.gz.sig,752280e9969ce750e2c80477c1b7b0e7,11,DNA,0,10000,61,42,0,"NC_002163.1 Campylobacter jejuni subsp. jejuni NCTC 11168 = ATCC 700819 chromosome, complete genome",../fasta/GCF_000009085.1_ASM908v1_genomic.fna.gz,CC0
+GCF_000009085.1_ASM908v1_genomic.fna.gz.sig,eba0eb3ce984cc53c36f134a752c52c5,21,DNA,0,10000,157,42,0,"NC_002163.1 Campylobacter jejuni subsp. jejuni NCTC 11168 = ATCC 700819 chromosome, complete genome",../fasta/GCF_000009085.1_ASM908v1_genomic.fna.gz,CC0
+GCF_000009085.1_ASM908v1_genomic.fna.gz.sig,953156e9f4da8cf22e7e0b4b88261fae,31,DNA,0,10000,167,42,0,"NC_002163.1 Campylobacter jejuni subsp. jejuni NCTC 11168 = ATCC 700819 chromosome, complete genome",../fasta/GCF_000009085.1_ASM908v1_genomic.fna.gz,CC0
diff --git a/tests/test-data/gather/salmonella-picklist.csv b/tests/test-data/gather/salmonella-picklist.csv
new file mode 100644
index 0000000000..ae048b99d4
--- /dev/null
+++ b/tests/test-data/gather/salmonella-picklist.csv
@@ -0,0 +1,25 @@
+signature_file,md5,ksize,moltype,num,scaled,n_hashes,seed,with_abundance,name,filename,license
+GCF_000006945.2_ASM694v2_genomic.fna.gz.sig,323c1a1712b0949268dd6fb93be63ae2,11,DNA,0,10000,150,42,0,"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome",../fasta/GCF_000006945.2_ASM694v2_genomic.fna.gz,CC0
+GCF_000006945.2_ASM694v2_genomic.fna.gz.sig,263c2de20b597d6e33b81ec91d8672b5,21,DNA,0,10000,485,42,0,"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome",../fasta/GCF_000006945.2_ASM694v2_genomic.fna.gz,CC0
+GCF_000006945.2_ASM694v2_genomic.fna.gz.sig,dc12a6d8fd63122aa68f78facf9bed94,31,DNA,0,10000,490,42,0,"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome",../fasta/GCF_000006945.2_ASM694v2_genomic.fna.gz,CC0
+GCF_000007545.1_ASM754v1_genomic.fna.gz.sig,df24140b1c6cad16b30abeaf03019eb5,11,DNA,0,10000,158,42,0,"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome",../fasta/GCF_000007545.1_ASM754v1_genomic.fna.gz,CC0
+GCF_000007545.1_ASM754v1_genomic.fna.gz.sig,fd958e3b5649bc03890517ff239970ea,21,DNA,0,10000,445,42,0,"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome",../fasta/GCF_000007545.1_ASM754v1_genomic.fna.gz,CC0
+GCF_000007545.1_ASM754v1_genomic.fna.gz.sig,8c22dff88a2239607762da00f7fd1725,31,DNA,0,10000,472,42,0,"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome",../fasta/GCF_000007545.1_ASM754v1_genomic.fna.gz,CC0
+GCF_000008105.1_ASM810v1_genomic.fna.gz.sig,9db6efc92a041e11713ccfa8597edae5,11,DNA,0,10000,150,42,0,"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome",../fasta/GCF_000008105.1_ASM810v1_genomic.fna.gz,CC0
+GCF_000008105.1_ASM810v1_genomic.fna.gz.sig,8996699a05d3e5a05fa3fe94bfa41431,21,DNA,0,10000,472,42,0,"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome",../fasta/GCF_000008105.1_ASM810v1_genomic.fna.gz,CC0
+GCF_000008105.1_ASM810v1_genomic.fna.gz.sig,85c3aeec6457c0b1d210472ddeb67714,31,DNA,0,10000,468,42,0,"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome",../fasta/GCF_000008105.1_ASM810v1_genomic.fna.gz,CC0
+GCF_000009505.1_ASM950v1_genomic.fna.gz.sig,0f35aeadda1532ed450bd6de1e73545d,11,DNA,0,10000,148,42,0,NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome,../fasta/GCF_000009505.1_ASM950v1_genomic.fna.gz,CC0
+GCF_000009505.1_ASM950v1_genomic.fna.gz.sig,405ae3300f28ca5fe5c223cbf7e28734,21,DNA,0,10000,471,42,0,NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome,../fasta/GCF_000009505.1_ASM950v1_genomic.fna.gz,CC0
+GCF_000009505.1_ASM950v1_genomic.fna.gz.sig,0842f7edb426fc4fa2701c107e678279,31,DNA,0,10000,461,42,0,NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome,../fasta/GCF_000009505.1_ASM950v1_genomic.fna.gz,CC0
+GCF_000009525.1_ASM952v1_genomic.fna.gz.sig,d883538a0c983a863fa4b6e5fcd19612,11,DNA,0,10000,148,42,0,NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome,../fasta/GCF_000009525.1_ASM952v1_genomic.fna.gz,CC0
+GCF_000009525.1_ASM952v1_genomic.fna.gz.sig,9133bd71b86628b38c665ab7e5eb8712,21,DNA,0,10000,457,42,0,NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome,../fasta/GCF_000009525.1_ASM952v1_genomic.fna.gz,CC0
+GCF_000009525.1_ASM952v1_genomic.fna.gz.sig,afadabf39aec247929e84a29fd797117,31,DNA,0,10000,461,42,0,NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome,../fasta/GCF_000009525.1_ASM952v1_genomic.fna.gz,CC0
+GCF_000011885.1_ASM1188v1_genomic.fna.gz.sig,feef9e4d39fecd3d9292b76c0cc72b81,11,DNA,0,10000,155,42,0,"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome",../fasta/GCF_000011885.1_ASM1188v1_genomic.fna.gz,CC0
+GCF_000011885.1_ASM1188v1_genomic.fna.gz.sig,cc80cb247b195ca3dfa0756257d882b6,21,DNA,0,10000,427,42,0,"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome",../fasta/GCF_000011885.1_ASM1188v1_genomic.fna.gz,CC0
+GCF_000011885.1_ASM1188v1_genomic.fna.gz.sig,bb365606acbf08d183399f139af80c32,31,DNA,0,10000,459,42,0,"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome",../fasta/GCF_000011885.1_ASM1188v1_genomic.fna.gz,CC0
+GCF_000016045.1_ASM1604v1_genomic.fna.gz.sig,4cec832176c4831239faed42c0616ef4,11,DNA,0,10000,155,42,0,"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome",../fasta/GCF_000016045.1_ASM1604v1_genomic.fna.gz,CC0
+GCF_000016045.1_ASM1604v1_genomic.fna.gz.sig,43a9d80a4cd995779c7538a32088dd0e,21,DNA,0,10000,469,42,0,"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome",../fasta/GCF_000016045.1_ASM1604v1_genomic.fna.gz,CC0
+GCF_000016045.1_ASM1604v1_genomic.fna.gz.sig,d0cfbe22579f98fd5de2d41203589964,31,DNA,0,10000,480,42,0,"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome",../fasta/GCF_000016045.1_ASM1604v1_genomic.fna.gz,CC0
+GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig,40df36a7eb411022be4b1d6a7af05496,11,DNA,0,10000,161,42,0,"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome",../fasta/GCF_000195995.1_ASM19599v1_genomic.fna.gz,CC0
+GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig,ffa92983f7e67454c407499cbfbabf88,21,DNA,0,10000,487,42,0,"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome",../fasta/GCF_000195995.1_ASM19599v1_genomic.fna.gz,CC0
+GCF_000195995.1_ASM19599v1_genomic.fna.gz.sig,cb26db5716a213c9a6614021e7176c1d,31,DNA,0,10000,512,42,0,"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome",../fasta/GCF_000195995.1_ASM19599v1_genomic.fna.gz,CC0
diff --git a/tests/test-data/gather/thermotoga-picklist.csv b/tests/test-data/gather/thermotoga-picklist.csv
new file mode 100644
index 0000000000..4606e0ca47
--- /dev/null
+++ b/tests/test-data/gather/thermotoga-picklist.csv
@@ -0,0 +1,10 @@
+signature_file,md5,ksize,moltype,num,scaled,n_hashes,seed,with_abundance,name,filename,license
+GCF_000008545.1_ASM854v1_genomic.fna.gz.sig,74b928d3db1f7f033c0dcca6c6e52aea,11,DNA,0,10000,84,42,0,"NC_000853.1 Thermotoga maritima MSB8 chromosome, complete genome",../fasta/GCF_000008545.1_ASM854v1_genomic.fna.gz,CC0
+GCF_000008545.1_ASM854v1_genomic.fna.gz.sig,ba9947e078cab29e20bc7d31bc1b9f0d,21,DNA,0,10000,192,42,0,"NC_000853.1 Thermotoga maritima MSB8 chromosome, complete genome",../fasta/GCF_000008545.1_ASM854v1_genomic.fna.gz,CC0
+GCF_000008545.1_ASM854v1_genomic.fna.gz.sig,1bfe96d76ec9cdb60779a1a9223c424e,31,DNA,0,10000,187,42,0,"NC_000853.1 Thermotoga maritima MSB8 chromosome, complete genome",../fasta/GCF_000008545.1_ASM854v1_genomic.fna.gz,CC0
+GCF_000016785.1_ASM1678v1_genomic.fna.gz.sig,328f7b0643bdb6c76135292b5afc8fa7,11,DNA,0,10000,82,42,0,"NC_009486.1 Thermotoga petrophila RKU-1, complete genome",../fasta/GCF_000016785.1_ASM1678v1_genomic.fna.gz,CC0
+GCF_000016785.1_ASM1678v1_genomic.fna.gz.sig,a77789e831fcd2436c3b9e4e22fb173e,21,DNA,0,10000,190,42,0,"NC_009486.1 Thermotoga petrophila RKU-1, complete genome",../fasta/GCF_000016785.1_ASM1678v1_genomic.fna.gz,CC0
+GCF_000016785.1_ASM1678v1_genomic.fna.gz.sig,50d8efd580ff2000cb38d1f8cc9cf9b4,31,DNA,0,10000,185,42,0,"NC_009486.1 Thermotoga petrophila RKU-1, complete genome",../fasta/GCF_000016785.1_ASM1678v1_genomic.fna.gz,CC0
+GCF_000018945.1_ASM1894v1_genomic.fna.gz.sig,989f88420b193ef39c4dbe3b268e0049,11,DNA,0,10000,90,42,0,"NC_011978.1 Thermotoga neapolitana DSM 4359, complete genome",../fasta/GCF_000018945.1_ASM1894v1_genomic.fna.gz,CC0
+GCF_000018945.1_ASM1894v1_genomic.fna.gz.sig,bebcd0dcc0ed3b120ad16c4e15805370,21,DNA,0,10000,188,42,0,"NC_011978.1 Thermotoga neapolitana DSM 4359, complete genome",../fasta/GCF_000018945.1_ASM1894v1_genomic.fna.gz,CC0
+GCF_000018945.1_ASM1894v1_genomic.fna.gz.sig,4289d4241be8573145282352215ca3c4,31,DNA,0,10000,198,42,0,"NC_011978.1 Thermotoga neapolitana DSM 4359, complete genome",../fasta/GCF_000018945.1_ASM1894v1_genomic.fna.gz,CC0
diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py
index 9de8e89267..af611a4ccd 100644
--- a/tests/test_cmd_signature.py
+++ b/tests/test_cmd_signature.py
@@ -1144,7 +1144,7 @@ def test_sig_extract_8_picklist_md5(runtmp):
 
     print(err)
     assert "loaded 1 distinct values into picklist." in err
-    assert "loaded 2 total that matched ksize & molecule type" in err
+    assert "loaded 1 total that matched ksize & molecule type" in err
     assert "extracted 1 signatures from 2 file(s)" in err
     assert "for given picklist, found 1 matches to 1 distinct values" in err
 
@@ -1189,7 +1189,7 @@ def test_sig_extract_8_picklist_md5_require_all(runtmp):
 
     print(err)
     assert "loaded 2 distinct values into picklist." in err
-    assert "loaded 2 total that matched ksize & molecule type" in err
+    assert "loaded 1 total that matched ksize & molecule type" in err
     assert "extracted 1 signatures from 2 file(s)" in err
     assert "for given picklist, found 1 matches to 2 distinct values" in err
     assert 'WARNING: 1 missing picklist values.' in err
diff --git a/tests/test_index.py b/tests/test_index.py
index 5e30c7c585..2da959410a 100644
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -17,6 +17,7 @@
 from sourmash.sbtmh import SigLeaf
 from sourmash import sourmash_args
 from sourmash.search import JaccardSearch, SearchType
+from sourmash.picklist import SignaturePicklist
 
 import sourmash_tst_utils as utils
 
@@ -634,6 +635,29 @@ def test_linear_index_moltype_select():
     assert len(linear2) == 0
 
 
+def test_linear_index_picklist_select():
+    # test select with a picklist
+
+    # this loads three ksizes, 21/31/51
+    sig2 = utils.get_test_data('2.fa.sig')
+    siglist = sourmash.load_file_as_signatures(sig2)
+
+    linear = LinearIndex()
+    for ss in siglist:
+        linear.insert(ss)
+
+    # construct a picklist...
+    picklist = SignaturePicklist('md5prefix8')
+    picklist.init(['f3a90d4e'])
+
+    # select on picklist
+    linear2 = linear.select(picklist=picklist)
+    assert len(linear2) == 1
+    ss = list(linear2.signatures())[0]
+    assert ss.minhash.ksize == 31
+    assert ss.md5sum().startswith('f3a90d4e55')
+
+
 @utils.in_tempdir
 def test_index_same_md5sum_fsstorage(c):
     testdata1 = utils.get_test_data('img/2706795855.sig')
diff --git a/tests/test_lca.py b/tests/test_lca.py
index a6443dbc6c..eb66562a3e 100644
--- a/tests/test_lca.py
+++ b/tests/test_lca.py
@@ -11,8 +11,10 @@
 import sourmash
 from sourmash import load_one_signature, SourmashSignature
 
+from sourmash.search import make_jaccard_search_query
 from sourmash.lca import lca_utils
 from sourmash.lca.lca_utils import LineagePair
+from sourmash.picklist import SignaturePicklist
 
 
 def test_api_create_search():
@@ -31,6 +33,40 @@ def test_api_create_search():
     assert match.minhash == ss.minhash
 
 
+def test_api_find_picklist_select():
+    # does 'find' respect picklists?
+
+    sig47 = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
+                                        ksize=31)
+    sig63 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
+                                        ksize=31)
+
+    lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
+    lca_db.insert(sig47)
+    lca_db.insert(sig63)
+
+    # construct a picklist...
+    picklist = SignaturePicklist('md5prefix8')
+    picklist.init(['09a08691'])
+
+    # run a 'find' with sig63, should find 47 and 63 both.
+    search_obj = make_jaccard_search_query(do_containment=True, threshold=0.0)
+    results = list(lca_db.find(search_obj, sig63))
+    print(results)
+    assert len(results) == 2
+
+    # now, select on picklist and do another find...
+    lca_db = lca_db.select(picklist=picklist)
+    results = list(lca_db.find(search_obj, sig63))
+    print(results)
+    assert len(results) == 1
+
+    # and check that it is the expected one!
+    ss = results[0].signature
+    assert ss.minhash.ksize == 31
+    assert ss.md5sum().startswith('09a08691c')
+
+
 def test_api_create_insert():
     # test some internal implementation stuff: create & then insert a sig.
     ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
@@ -460,6 +496,46 @@ def test_lca_index_select():
         db.select(moltype='protein')
 
 
+def test_lca_index_select_picklist():
+    # test 'select' method from Index base class with a picklist.
+
+    filename = utils.get_test_data('lca/47+63.lca.json')
+    db, ksize, scaled = lca_utils.load_single_database(filename)
+
+    # construct a picklist...
+    picklist = SignaturePicklist('md5prefix8')
+    picklist.init(['50a92740'])
+
+    xx = db.select(picklist=picklist)
+    assert xx == db
+
+    siglist = list(db.signatures())
+    assert len(siglist) == 1
+    ss = siglist[0]
+    assert ss.md5sum().startswith('50a92740')
+    assert ss.minhash.ksize == 31
+
+
+def test_lca_index_select_picklist_twice():
+    # test 'select' method from Index base class with a picklist.
+
+    filename = utils.get_test_data('lca/47+63.lca.json')
+    db, ksize, scaled = lca_utils.load_single_database(filename)
+
+    # construct a picklist...
+    picklist = SignaturePicklist('md5prefix8')
+    picklist.init(['50a92740'])
+
+    xx = db.select(picklist=picklist)
+    assert xx == db
+
+    with pytest.raises(ValueError) as exc:
+        xx = db.select(picklist=picklist)
+
+    assert "we do not (yet) support multiple picklists for LCA databases" in str(exc)
+
+
+
 def test_search_db_scaled_gt_sig_scaled():
     dbfile = utils.get_test_data('lca/47+63.lca.json')
     db, ksize, scaled = lca_utils.load_single_database(dbfile)
@@ -2438,3 +2514,31 @@ def test_lca_db_dayhoff_command_search(c):
     c.run_sourmash('gather', sigfile1, db_out, '--threshold', '0.0')
     assert 'found 1 matches total' in c.last_result.out
     assert 'the recovered matches hit 100.0% of the query' in c.last_result.out
+
+
+def test_lca_index_with_picklist(runtmp):
+    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    outdb = runtmp.output('gcf.lca.json')
+    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+
+    # create an empty spreadsheet
+    with open(runtmp.output('empty.csv'), 'wt') as fp:
+        fp.write('accession,superkingdom,phylum,class,order,family,genus,species,strain')
+
+    runtmp.sourmash('lca', 'index', 'empty.csv', outdb, *gcf_sigs,
+                    '-k', '21', '--picklist', f"{picklist}:md5:md5")
+
+    out = runtmp.last_result.out
+    err = runtmp.last_result.err
+
+    print(out)
+    print(err)
+
+    assert "for given picklist, found 3 matches to 9 distinct values" in err
+    assert "WARNING: 6 missing picklist values."
+    assert "WARNING: no lineage provided for 3 signatures" in err
+
+    siglist = list(sourmash.load_file_as_signatures(outdb))
+    assert len(siglist) == 3
+    for ss in siglist:
+        assert 'Thermotoga' in ss.name
diff --git a/tests/test_prefetch.py b/tests/test_prefetch.py
index da37559d2b..72556f5135 100644
--- a/tests/test_prefetch.py
+++ b/tests/test_prefetch.py
@@ -4,6 +4,7 @@
 import os
 import csv
 import pytest
+import glob
 
 import sourmash_tst_utils as utils
 import sourmash
@@ -444,3 +445,26 @@ def test_prefetch_basic_many_sigs(runtmp, linear_gather):
     assert "total of 10 matching signatures." in c.last_result.err
     assert "of 5177 distinct query hashes, 5177 were found in matches above threshold." in c.last_result.err
     assert "a total of 0 query hashes remain unmatched." in c.last_result.err
+
+
+def test_prefetch_with_picklist(runtmp):
+    # test 'sourmash prefetch' with picklists
+    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    metag_sig = utils.get_test_data('gather/combined.sig')
+    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+
+    runtmp.sourmash('prefetch', metag_sig, *gcf_sigs,
+                    '-k', '21', '--picklist', f"{picklist}:md5:md5")
+
+    err = runtmp.last_result.err
+    print(err)
+    assert "for given picklist, found 3 matches to 9 distinct values" in err
+    # these are the different ksizes
+    assert "WARNING: 6 missing picklist values." in err
+
+    out = runtmp.last_result.out
+    print(out)
+
+    assert "total of 3 matching signatures." in err
+    assert "of 1466 distinct query hashes, 453 were found in matches above threshold." in err
+    assert "a total of 1013 query hashes remain unmatched." in err
diff --git a/tests/test_sbt.py b/tests/test_sbt.py
index 1678dbf177..cf853d1b6c 100644
--- a/tests/test_sbt.py
+++ b/tests/test_sbt.py
@@ -14,6 +14,7 @@
 from sourmash.sbt_storage import (FSStorage, RedisStorage,
                                   IPFSStorage, ZipStorage)
 from sourmash.search import make_jaccard_search_query
+from sourmash.picklist import SignaturePicklist
 
 import sourmash_tst_utils as utils
 
@@ -231,10 +232,12 @@ def test_search_minhashes():
     # this fails if 'search_obj' is calc containment and not similarity.
     search_obj = make_jaccard_search_query(threshold=0.08)
     results = tree.find(search_obj, to_search.data)
-    for sr in results:
+
+    n = 0
+    for n, sr in enumerate(results):
         assert to_search.data.jaccard(sr.signature) >= 0.08
 
-    print(results)
+    assert n == 1
 
 
 def test_binary_nary_tree():
@@ -636,6 +639,96 @@ def test_sbt_as_index_select():
         tree.select(moltype='protein')
 
 
+def test_sbt_as_index_select_picklist():
+    # test 'select' method from Index base class with a picklist
+
+    factory = GraphFactory(31, 1e5, 4)
+    tree = SBT(factory, d=2)
+
+    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
+    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+
+    tree.insert(sig47)
+    tree.insert(sig63)
+
+    # construct a picklist...
+    picklist = SignaturePicklist('md5prefix8')
+    picklist.init(['09a08691'])
+
+    # select on picklist
+    tree = tree.select(picklist=picklist)
+    siglist = list(tree.signatures())
+    assert len(siglist) == 1
+
+    ss = siglist[0]
+    assert ss.minhash.ksize == 31
+    assert ss.md5sum().startswith('09a08691c')
+
+
+def test_sbt_as_index_find_picklist():
+    # test 'select' method from Index base class with a picklist
+
+    factory = GraphFactory(31, 1e5, 4)
+    tree = SBT(factory, d=2)
+
+    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
+    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+
+    tree.insert(sig47)
+    tree.insert(sig63)
+
+    # construct a picklist...
+    picklist = SignaturePicklist('md5prefix8')
+    picklist.init(['09a08691'])
+
+    # run a 'find' with sig63, should find 47 and 63 both.
+    search_obj = make_jaccard_search_query(do_containment=True, threshold=0.0)
+    results = list(tree.find(search_obj, sig63))
+    print(results)
+    assert len(results) == 2
+
+    # now, select on picklist and do another find...
+    tree = tree.select(picklist=picklist)
+    results = list(tree.find(search_obj, sig63))
+    print(results)
+    assert len(results) == 1
+
+    # and check that it is the expected one!
+    ss = results[0].signature
+    assert ss.minhash.ksize == 31
+    assert ss.md5sum().startswith('09a08691c')
+
+
+def test_sbt_as_index_find_picklist_twice():
+    # test 'select' method from Index base class with a picklist
+
+    factory = GraphFactory(31, 1e5, 4)
+    tree = SBT(factory, d=2)
+
+    sig47 = load_one_signature(utils.get_test_data('47.fa.sig'))
+    sig63 = load_one_signature(utils.get_test_data('63.fa.sig'))
+
+    tree.insert(sig47)
+    tree.insert(sig63)
+
+    # construct a picklist...
+    picklist = SignaturePicklist('md5prefix8')
+    picklist.init(['09a08691'])
+
+    # run a 'find' with sig63, should find 47 and 63 both.
+    search_obj = make_jaccard_search_query(do_containment=True, threshold=0.0)
+    results = list(tree.find(search_obj, sig63))
+    print(results)
+    assert len(results) == 2
+
+    # now, select twice on picklists...
+    tree = tree.select(picklist=picklist)
+
+    with pytest.raises(ValueError):
+        tree = tree.select(picklist=picklist)
+        assert "we do not (yet) support multiple picklists for SBT databases" in str(exc)
+
+
 def test_sbt_as_index_signatures():
     # test 'signatures' method from Index base class.
     factory = GraphFactory(31, 1e5, 4)
diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py
index 6474df077c..c6abad69e4 100644
--- a/tests/test_sourmash.py
+++ b/tests/test_sourmash.py
@@ -2023,6 +2023,29 @@ def test_search_metagenome_downsample_index(c):
     assert '12 matches; showing first 3:' in str(c)
 
 
+def test_search_with_picklist(runtmp):
+    # test 'sourmash search' with picklists
+    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    metag_sig = utils.get_test_data('gather/combined.sig')
+    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+
+    runtmp.sourmash('search', metag_sig, *gcf_sigs, '--containment',
+                    '-k', '21', '--picklist', f"{picklist}:md5:md5")
+
+    err = runtmp.last_result.err
+    print(err)
+    assert "for given picklist, found 3 matches to 9 distinct values" in err
+    # these are the different ksizes
+    assert "WARNING: 6 missing picklist values." in err
+
+    out = runtmp.last_result.out
+    print(out)
+    assert "3 matches:" in out
+    assert "13.1%       NC_000853.1 Thermotoga" in out
+    assert "13.0%       NC_009486.1 Thermotoga" in out
+    assert "12.8%       NC_011978.1 Thermotoga" in out
+
+
 def test_mash_csv_to_sig():
     with utils.TempDirectory() as location:
         testdata1 = utils.get_test_data('short.fa.msh.dump')
@@ -2886,6 +2909,27 @@ def test_compare_with_abundance_3():
         assert '70.5%' in out
 
 
+def test_compare_with_picklist(runtmp):
+    # test 'sourmash compare' with picklists
+    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+
+    runtmp.sourmash('compare', *gcf_sigs,
+                    '-k', '21', '--picklist', f"{picklist}:md5:md5")
+
+    err = runtmp.last_result.err
+    out = runtmp.last_result.out
+    print(runtmp.last_result.out)
+    print(runtmp.last_result.err)
+
+    assert "for given picklist, found 3 matches to 9 distinct values" in err
+    assert "WARNING: 6 missing picklist values." in err
+
+    assert "NC_009486.1 The..." in out
+    assert "NC_000853.1 The..." in out
+    assert "NC_011978.1 The..." in out
+
+
 def test_gather(linear_gather, prefetch_gather):
     with utils.TempDirectory() as location:
         testdata1 = utils.get_test_data('short.fa')
@@ -3965,6 +4009,30 @@ def test_gather_query_downsample_explicit(linear_gather, prefetch_gather):
                     'NC_003197.2' in out))
 
 
+def test_gather_with_picklist(runtmp, linear_gather, prefetch_gather):
+    # test 'sourmash gather' with picklists
+    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    metag_sig = utils.get_test_data('gather/combined.sig')
+    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+
+    runtmp.sourmash('gather', metag_sig, *gcf_sigs, '--threshold-bp=0',
+                    '-k', '21', '--picklist', f"{picklist}:md5:md5",
+                    linear_gather, prefetch_gather)
+
+    err = runtmp.last_result.err
+    print(err)
+    assert "for given picklist, found 3 matches to 9 distinct values" in err
+    # these are the different ksizes
+    assert "WARNING: 6 missing picklist values." in err
+
+    out = runtmp.last_result.out
+    print(out)
+    assert "found 3 matches total;" in out
+    assert "1.9 Mbp       13.1%  100.0%    NC_000853.1 Thermotoga" in out
+    assert "1.9 Mbp       11.5%   89.9%    NC_011978.1 Thermotoga" in out
+    assert "1.9 Mbp        6.3%   48.4%    NC_009486.1 Thermotoga" in out
+
+
 def test_gather_save_matches(linear_gather, prefetch_gather):
     with utils.TempDirectory() as location:
         testdata_glob = utils.get_test_data('gather/GCF*.sig')
@@ -4805,3 +4873,104 @@ def test_do_sourmash_index_zipfile_append(c):
         sbts = [c for c in content if c.endswith(".sbt.json")]
         assert len(sbts) == 1
         assert sbts[0] == "zzz.sbt.json"
+
+
+def test_index_with_picklist(runtmp):
+    # test 'sourmash index' with picklists
+    gcf_sig_dir = utils.get_test_data('gather/')
+    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+
+    output_db = runtmp.output('thermo.sbt.zip')
+
+    runtmp.sourmash('index', output_db, gcf_sig_dir,
+                    '-k', '31', '--picklist', f"{picklist}:md5:md5")
+
+    err = runtmp.last_result.err
+    print(err)
+    assert "for given picklist, found 3 matches to 9 distinct values" in err
+
+    # these are the different ksizes
+    assert "WARNING: 6 missing picklist values." in err
+
+    # verify:
+    siglist = list(sourmash.load_file_as_signatures(output_db))
+    assert len(siglist) == 3
+    for ss in siglist:
+        assert 'Thermotoga' in ss.name
+
+
+def test_index_matches_search_with_picklist(runtmp):
+    # test 'sourmash index' with picklists
+    gcf_sig_dir = utils.get_test_data('gather/')
+    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    picklist = utils.get_test_data('gather/thermotoga-picklist.csv')
+    metag_sig = utils.get_test_data('gather/combined.sig')
+
+    output_db = runtmp.output('thermo.sbt.zip')
+
+    runtmp.sourmash('index', output_db, gcf_sig_dir, '-k', '21')
+    print(runtmp.last_result.out)
+    print(runtmp.last_result.err)
+
+    # verify:
+    siglist = list(sourmash.load_file_as_signatures(output_db))
+    assert len(siglist) > 3     # all signatures included...
+
+    n_thermo = 0
+    for ss in siglist:
+        if 'Thermotoga' in ss.name:
+            n_thermo += 1
+
+    assert n_thermo == 3
+
+    runtmp.sourmash('search', metag_sig, output_db, '--containment',
+                    '-k', '21', '--picklist', f"{picklist}:md5:md5")
+
+    err = runtmp.last_result.err
+    print(err)
+    assert "for given picklist, found 3 matches to 9 distinct values" in err
+    # these are the different ksizes
+    assert "WARNING: 6 missing picklist values." in err
+
+    out = runtmp.last_result.out
+    print(out)
+    assert "3 matches:" in out
+    assert "13.1%       NC_000853.1 Thermotoga" in out
+    assert "13.0%       NC_009486.1 Thermotoga" in out
+    assert "12.8%       NC_011978.1 Thermotoga" in out
+
+
+def test_gather_with_prefetch_picklist(runtmp, linear_gather):
+    # test 'gather' using a picklist taken from 'sourmash prefetch' output
+    gcf_sigs = glob.glob(utils.get_test_data('gather/GCF*.sig'))
+    metag_sig = utils.get_test_data('gather/combined.sig')
+    prefetch_csv = runtmp.output('prefetch-out.csv')
+
+    runtmp.sourmash('prefetch', metag_sig, *gcf_sigs,
+                    '-k', '21', '-o', prefetch_csv)
+
+    err = runtmp.last_result.err
+    print(err)
+
+    out = runtmp.last_result.out
+    print(out)
+
+    assert "total of 12 matching signatures." in err
+    assert "of 1466 distinct query hashes, 1466 were found in matches above threshold." in err
+
+    # now, do a gather with the results
+    runtmp.sourmash('gather', metag_sig, *gcf_sigs, linear_gather,
+                    '-k', '21', '--picklist',
+                    f'{prefetch_csv}:match_md5:md5short')
+
+    err = runtmp.last_result.err
+    print(err)
+
+    out = runtmp.last_result.out
+    print(out)
+
+    assert "found 11 matches total;" in out
+    assert "the recovered matches hit 99.9% of the query" in out
+
+    assert "4.9 Mbp       33.2%  100.0%    NC_003198.1 " in out
+    assert "1.9 Mbp       13.1%  100.0%    NC_000853.1 " in out