Skip to content

Commit

Permalink
Allow repeating an option that takes multiple values
Browse files Browse the repository at this point in the history
Many command line arguments take multiple values. Previously, to utilize
this feature, all values must be specified after a single option flag
(e.g. --exclude-where 'region=A' 'region=B'). If options were set using
separate option flags (e.g. --exclude-where 'region=A' --exclude-where
'region=B'), only the last flag would be used, which is unintuitive.
This commit enables all option flags to be used.

Done across the codebase by adding action='extend' to all options that
use nargs='+' and did not already specify an action.

A side effect of action='extend' is that it extends instead of replacing
a default value defined in add_argument. For arguments that use a custom
default value, use a custom argparse action ExtendOverwriteDefault,
based on the similarly named AppendOverwriteDefault from Nextstrain
CLI¹.

¹ <https://github.com/nextstrain/cli/blob/9bf646b0c795d04658a6f6807d74428b7c173995/nextstrain/cli/argparse.py#L183-L197>
  • Loading branch information
victorlin committed Apr 22, 2024
1 parent bac41fa commit 03b49da
Show file tree
Hide file tree
Showing 27 changed files with 103 additions and 70 deletions.
2 changes: 1 addition & 1 deletion augur/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def register_arguments(parser):
Kept as a separate function than `register_parser` to continue to support
unit tests that use this function to create argparser.
"""
parser.add_argument('--sequences', '-s', required=True, nargs="+", metavar="FASTA", help="sequences to align")
parser.add_argument('--sequences', '-s', required=True, nargs="+", action="extend", metavar="FASTA", help="sequences to align")
parser.add_argument('--output', '-o', default="alignment.fasta", help="output file (default: %(default)s)")
parser.add_argument('--nthreads', type=nthreads_value, default=1,
help="number of threads to use; specifying the value 'auto' will cause the number of available CPU cores on your system, if determinable, to be used")
Expand Down
2 changes: 1 addition & 1 deletion augur/ancestral.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def register_parser(parent_subparsers):
)
amino_acid_options_group.add_argument('--annotation',
help='GenBank or GFF file containing the annotation')
amino_acid_options_group.add_argument('--genes', nargs='+', help="genes to translate (list or file containing list)")
amino_acid_options_group.add_argument('--genes', nargs='+', action='extend', help="genes to translate (list or file containing list)")
amino_acid_options_group.add_argument('--translations', type=str, help="translated alignments for each CDS/Gene. "
"Currently only supported for FASTA-input. Specify the file name via a "
"template like 'aa_sequences_%%GENE.fasta' where %%GENE will be replaced "
Expand Down
17 changes: 17 additions & 0 deletions augur/argparse_.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,20 @@ class HideAsFalseAction(Action):
"""
def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, option_string[2:6] != 'hide')


class ExtendOverwriteDefault(Action):
"""
Similar to the core argparse ``extend`` action, but overwrites the argument
``default``, if any, instead of appending to it.
Thus, the ``default`` value is not included when the option is given and
may be a non-list value if desired.
"""
def __call__(self, parser, namespace, value, option_string = None):
current = getattr(namespace, self.dest, None)

if current is parser.get_default(self.dest) or current is None:
current = []

setattr(namespace, self.dest, [*current, *value])
4 changes: 2 additions & 2 deletions augur/clades.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,8 @@ def parse_nodes(tree_file, node_data_files):
def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("clades", help=__doc__)
parser.add_argument('--tree', required=True, help="prebuilt Newick -- no tree will be built if provided")
parser.add_argument('--mutations', required=True, metavar="NODE_DATA_JSON", nargs='+', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
parser.add_argument('--reference', nargs='+', help=SUPPRESS)
parser.add_argument('--mutations', required=True, metavar="NODE_DATA_JSON", nargs='+', action='extend', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
parser.add_argument('--reference', nargs='+', action='extend', help=SUPPRESS)
parser.add_argument('--clades', required=True, metavar="TSV", type=str, help='TSV file containing clade definitions by amino-acid')
parser.add_argument('--output-node-data', type=str, metavar="NODE_DATA_JSON", help='name of JSON file to save clade assignments to')
parser.add_argument('--membership-name', type=str, default="clade_membership", help='Key to store clade membership under; use "None" to not export this')
Expand Down
4 changes: 2 additions & 2 deletions augur/curate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from collections import deque
from textwrap import dedent

from augur.argparse_ import add_command_subparsers
from augur.argparse_ import ExtendOverwriteDefault, add_command_subparsers
from augur.errors import AugurError
from augur.io.json import dump_ndjson, load_ndjson
from augur.io.metadata import DEFAULT_DELIMITERS, InvalidDelimiter, read_table_to_dict, read_metadata_with_sequences, write_records_to_tsv
Expand Down Expand Up @@ -53,7 +53,7 @@ def create_shared_parser():
help="Name of the metadata column that contains the record identifier for reporting duplicate records. "
"Uses the first column of the metadata file if not provided. "
"Ignored if also providing a FASTA file input.")
shared_inputs.add_argument("--metadata-delimiters", default=DEFAULT_DELIMITERS, nargs="+",
shared_inputs.add_argument("--metadata-delimiters", default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault,
help="Delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")

shared_inputs.add_argument("--fasta",
Expand Down
4 changes: 2 additions & 2 deletions augur/curate/format_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ def register_parser(parent_subparsers):
help=__doc__)

required = parser.add_argument_group(title="REQUIRED")
required.add_argument("--date-fields", nargs="+",
required.add_argument("--date-fields", nargs="+", action="extend",
help="List of date field names in the record that need to be standardized.")
required.add_argument("--expected-date-formats", nargs="+",
required.add_argument("--expected-date-formats", nargs="+", action="extend",
help="Expected date formats that are currently in the provided date fields, " +
"defined by standard format codes as listed at " +
"https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes. " +
Expand Down
10 changes: 5 additions & 5 deletions augur/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,11 +660,11 @@ def get_distances_to_all_pairs(tree, sequences_by_node_and_gene, distance_map, e
def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("distance", help=first_line(__doc__))
parser.add_argument("--tree", help="Newick tree", required=True)
parser.add_argument("--alignment", nargs="+", help="sequence(s) to be used, supplied as FASTA files", required=True)
parser.add_argument('--gene-names', nargs="+", type=str, help="names of the sequences in the alignment, same order assumed", required=True)
parser.add_argument("--attribute-name", nargs="+", help="name to store distances associated with the given distance map; multiple attribute names are linked to corresponding positional comparison method and distance map arguments", required=True)
parser.add_argument("--compare-to", nargs="+", choices=["root", "ancestor", "pairwise"], help="type of comparison between samples in the given tree including comparison of all nodes to the root (root), all tips to their last ancestor from a previous season (ancestor), or all tips from the current season to all tips in previous seasons (pairwise)", required=True)
parser.add_argument("--map", nargs="+", help="JSON providing the distance map between sites and, optionally, sequences present at those sites; the distance map JSON minimally requires a 'default' field defining a default numeric distance and a 'map' field defining a dictionary of genes and one-based coordinates", required=True)
parser.add_argument("--alignment", nargs="+", action="extend", help="sequence(s) to be used, supplied as FASTA files", required=True)
parser.add_argument('--gene-names', nargs="+", action="extend", type=str, help="names of the sequences in the alignment, same order assumed", required=True)
parser.add_argument("--attribute-name", nargs="+", action="extend", help="name to store distances associated with the given distance map; multiple attribute names are linked to corresponding positional comparison method and distance map arguments", required=True)
parser.add_argument("--compare-to", nargs="+", action="extend", choices=["root", "ancestor", "pairwise"], help="type of comparison between samples in the given tree including comparison of all nodes to the root (root), all tips to their last ancestor from a previous season (ancestor), or all tips from the current season to all tips in previous seasons (pairwise)", required=True)
parser.add_argument("--map", nargs="+", action="extend", help="JSON providing the distance map between sites and, optionally, sequences present at those sites; the distance map JSON minimally requires a 'default' field defining a default numeric distance and a 'map' field defining a dictionary of genes and one-based coordinates", required=True)
parser.add_argument("--date-annotations", help="JSON of branch lengths and date annotations from augur refine for samples in the given tree; required for comparisons to earliest or latest date")
parser.add_argument("--earliest-date", help="earliest date at which samples are considered to be from previous seasons (e.g., 2019-01-01). This date is only used in pairwise comparisons. If omitted, all samples prior to the latest date will be considered.")
parser.add_argument("--latest-date", help="latest date at which samples are considered to be from previous seasons (e.g., 2019-01-01); samples from any date after this are considered part of the current season")
Expand Down
3 changes: 2 additions & 1 deletion augur/export_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from Bio import Phylo
from argparse import SUPPRESS
from collections import defaultdict
from .argparse_ import ExtendOverwriteDefault
from .errors import AugurError
from .io.metadata import DEFAULT_DELIMITERS, InvalidDelimiter, read_metadata
from .utils import read_node_data, write_json, read_config, read_lat_longs, read_colors
Expand Down Expand Up @@ -312,7 +313,7 @@ def add_core_args(parser):
core = parser.add_argument_group("REQUIRED")
core.add_argument('--tree','-t', required=True, help="tree to perform trait reconstruction on")
core.add_argument('--metadata', required=True, metavar="FILE", help="sequence metadata")
core.add_argument('--metadata-delimiters', default=DEFAULT_DELIMITERS, nargs="+",
core.add_argument('--metadata-delimiters', default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault,
help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")
core.add_argument('--node-data', required=True, nargs='+', action="extend", help="JSON files with meta data for each node")
core.add_argument('--output-tree', help="JSON file name that is passed on to auspice (e.g., zika_tree.json).")
Expand Down
13 changes: 7 additions & 6 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import re
from Bio import Phylo

from .argparse_ import ExtendOverwriteDefault
from .errors import AugurError
from .io.file import open_file
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, InvalidDelimiter, read_metadata
Expand Down Expand Up @@ -889,21 +890,21 @@ def register_parser(parent_subparsers):
config.add_argument('--maintainers', metavar="name", action="append", nargs='+', help="Analysis maintained by, in format 'Name <URL>' 'Name2 <URL>', ...")
config.add_argument('--build-url', type=str, metavar="url", help="Build URL/repository to be displayed by Auspice")
config.add_argument('--description', metavar="description.md", help="Markdown file with description of build and/or acknowledgements to be displayed by Auspice")
config.add_argument('--geo-resolutions', metavar="trait", nargs='+', help="Geographic traits to be displayed on map")
config.add_argument('--color-by-metadata', metavar="trait", nargs='+', help="Metadata columns to include as coloring options")
config.add_argument('--metadata-columns', nargs="+",
config.add_argument('--geo-resolutions', metavar="trait", nargs='+', action='extend', help="Geographic traits to be displayed on map")
config.add_argument('--color-by-metadata', metavar="trait", nargs='+', action='extend', help="Metadata columns to include as coloring options")
config.add_argument('--metadata-columns', nargs="+", action="extend",
help="Metadata columns to export in addition to columns provided by --color-by-metadata or colorings in the Auspice configuration file. " +
"These columns will not be used as coloring options in Auspice but will be visible in the tree.")
config.add_argument('--panels', metavar="panels", nargs='+', choices=['tree', 'map', 'entropy', 'frequencies', 'measurements'], help="Restrict panel display in auspice. Options are %(choices)s. Ignore this option to display all available panels.")
config.add_argument('--panels', metavar="panels", nargs='+', action='extend', choices=['tree', 'map', 'entropy', 'frequencies', 'measurements'], help="Restrict panel display in auspice. Options are %(choices)s. Ignore this option to display all available panels.")

optional_inputs = parser.add_argument_group(
title="OPTIONAL INPUT FILES"
)
optional_inputs.add_argument('--node-data', metavar="JSON", nargs='+', action="extend", help="JSON files containing metadata for nodes in the tree")
optional_inputs.add_argument('--metadata', metavar="FILE", help="Additional metadata for strains in the tree")
optional_inputs.add_argument('--metadata-delimiters', default=DEFAULT_DELIMITERS, nargs="+",
optional_inputs.add_argument('--metadata-delimiters', default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault,
help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")
optional_inputs.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+",
optional_inputs.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+", action=ExtendOverwriteDefault,
help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.")
optional_inputs.add_argument('--colors', metavar="FILE", help="Custom color definitions, one per line in the format `TRAIT_TYPE\\tTRAIT_VALUE\\tHEX_CODE`")
optional_inputs.add_argument('--lat-longs', metavar="TSV", help="Latitudes and longitudes for geography traits (overrides built in mappings)")
Expand Down
17 changes: 9 additions & 8 deletions augur/filter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Filter and subsample a sequence set.
"""
from augur.argparse_ import ExtendOverwriteDefault
from augur.dates import numeric_date_type, SUPPORTED_DATE_HELP_TEXT
from augur.filter.io import ACCEPTED_TYPES, column_type_pair
from augur.io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, METADATA_DATE_COLUMN
Expand All @@ -19,8 +20,8 @@ def register_arguments(parser):
input_group.add_argument('--sequences', '-s', help="sequences in FASTA or VCF format")
input_group.add_argument('--sequence-index', help="sequence composition report generated by augur index. If not provided, an index will be created on the fly.")
input_group.add_argument('--metadata-chunk-size', type=int, default=100000, help="maximum number of metadata records to read into memory at a time. Increasing this number can speed up filtering at the cost of more memory used.")
input_group.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+", help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.")
input_group.add_argument('--metadata-delimiters', default=DEFAULT_DELIMITERS, nargs="+", help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")
input_group.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+", action=ExtendOverwriteDefault, help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.")
input_group.add_argument('--metadata-delimiters', default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault, help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")

metadata_filter_group = parser.add_argument_group("metadata filters", "filters to apply to metadata")
metadata_filter_group.add_argument(
Expand All @@ -29,7 +30,7 @@ def register_arguments(parser):
Uses Pandas Dataframe querying, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query for syntax.
(e.g., --query "country == 'Colombia'" or --query "(country == 'USA' & (division == 'Washington'))")"""
)
metadata_filter_group.add_argument('--query-columns', type=column_type_pair, nargs="+", help=f"""
metadata_filter_group.add_argument('--query-columns', type=column_type_pair, nargs="+", action="extend", help=f"""
Use alongside --query to specify columns and data types in the format 'column:type', where type is one of ({','.join(ACCEPTED_TYPES)}).
Automatic type inference will be attempted on all unspecified columns used in the query.
Example: region:str coverage:float.
Expand All @@ -38,12 +39,12 @@ def register_arguments(parser):
metadata_filter_group.add_argument('--max-date', type=numeric_date_type, help=f"maximal cutoff for date, the cutoff date is inclusive; may be specified as: {SUPPORTED_DATE_HELP_TEXT}")
metadata_filter_group.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'],
help='Exclude ambiguous dates by day (e.g., 2020-09-XX), month (e.g., 2020-XX-XX), year (e.g., 200X-10-01), or any date fields. An ambiguous year makes the corresponding month and day ambiguous, too, even if those fields have unambiguous values (e.g., "201X-10-01"). Similarly, an ambiguous month makes the corresponding day ambiguous (e.g., "2010-XX-01").')
metadata_filter_group.add_argument('--exclude', type=str, nargs="+", help="file(s) with list of strains to exclude")
metadata_filter_group.add_argument('--exclude-where', nargs='+',
metadata_filter_group.add_argument('--exclude', type=str, nargs="+", action="extend", help="file(s) with list of strains to exclude")
metadata_filter_group.add_argument('--exclude-where', nargs='+', action='extend',
help="Exclude samples matching these conditions. Ex: \"host=rat\" or \"host!=rat\". Multiple values are processed as OR (matching any of those specified will be excluded), not AND")
metadata_filter_group.add_argument('--exclude-all', action="store_true", help="exclude all strains by default. Use this with the include arguments to select a specific subset of strains.")
metadata_filter_group.add_argument('--include', type=str, nargs="+", help="file(s) with list of strains to include regardless of priorities, subsampling, or absence of an entry in --sequences.")
metadata_filter_group.add_argument('--include-where', nargs='+', help="""
metadata_filter_group.add_argument('--include', type=str, nargs="+", action="extend", help="file(s) with list of strains to include regardless of priorities, subsampling, or absence of an entry in --sequences.")
metadata_filter_group.add_argument('--include-where', nargs='+', action='extend', help="""
Include samples with these values. ex: host=rat. Multiple values are
processed as OR (having any of those specified will be included), not
AND. This rule is applied last and ensures any strains matching these
Expand All @@ -56,7 +57,7 @@ def register_arguments(parser):
sequence_filter_group.add_argument('--non-nucleotide', action='store_true', help="exclude sequences that contain illegal characters")

subsample_group = parser.add_argument_group("subsampling", "options to subsample filtered data")
subsample_group.add_argument('--group-by', nargs='+', help=f"""
subsample_group.add_argument('--group-by', nargs='+', action='extend', help=f"""
categories with respect to subsample.
Notes:
(1) Grouping by {sorted(constants.GROUP_BY_GENERATED_COLUMNS)} is only supported when there is a {METADATA_DATE_COLUMN!r} column in the metadata.
Expand Down
Loading

0 comments on commit 03b49da

Please sign in to comment.