-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement CLI with subcommand pattern (#7)
This update includes several changes and new features. - A new `refr` module to retrieve reference genome sequences for a specified panel of microhaplotype loci (closes #4) - A switch to a subcommand design for the command-line interface - Additional tests, including tests that depend on `hg38.fasta` not included in the source code distribution. - Extensive refactoring of the core code and tests in support of the previous points
- Loading branch information
Showing
26 changed files
with
483 additions
and
264 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,4 @@ microhapulator.egg-info/ | |
hg38.fasta | ||
hg38.fasta.fai | ||
.coverage.* | ||
scratch/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/env python3 | ||
# | ||
# ----------------------------------------------------------------------------- | ||
# Copyright (c) 2019, Battelle National Biodefense Institute. | ||
# | ||
# This file is part of MicroHapulator (github.com/bioforensics/microhapulator) | ||
# and is licensed under the BSD license: see LICENSE.txt. | ||
# ----------------------------------------------------------------------------- | ||
|
||
import microhapulator | ||
|
||
|
||
def main(arglist=None): | ||
"""Entry point for the MicroHapulator CLI. | ||
Isolated as a method so that the CLI can be called by other Python code | ||
(e.g. for testing), in which case the arguments are passed to the function. | ||
If no arguments are passed to the function, parse them from the command | ||
line. | ||
""" | ||
args = microhapulator.cli.parse_args(arglist) | ||
if args.cmd is None: # pragma: no cover | ||
microhapulator.cli.parser().parse_args(['-h']) | ||
|
||
assert args.cmd in microhapulator.cli.mains | ||
mainmethod = microhapulator.cli.mains[args.cmd] | ||
versionmessage = '[MicroHapulator] running version {}'.format(microhapulator.__version__) | ||
microhapulator.plog(versionmessage) | ||
mainmethod(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
# | ||
# ----------------------------------------------------------------------------- | ||
# Copyright (c) 2019, Battelle National Biodefense Institute. | ||
# | ||
# This file is part of MicroHapulator (github.com/bioforensics/microhapulator) | ||
# and is licensed under the BSD license: see LICENSE.txt. | ||
# ----------------------------------------------------------------------------- | ||
|
||
from argparse import ArgumentParser, RawDescriptionHelpFormatter | ||
import microhapulator | ||
from sys import stderr | ||
from . import refr | ||
from . import sim | ||
|
||
mains = { | ||
'refr': microhapulator.refr.main, | ||
'sim': microhapulator.sim.main, | ||
} | ||
|
||
subparser_funcs = { | ||
'refr': refr.subparser, | ||
'sim': sim.subparser, | ||
} | ||
|
||
|
||
def get_parser(): | ||
bubbletext = r''' | ||
≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠ | ||
MicroHapulator | ||
≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠ | ||
''' | ||
subcommandstr = '", "'.join(sorted(list(mains.keys()))) | ||
parser = ArgumentParser( | ||
description=bubbletext, | ||
formatter_class=RawDescriptionHelpFormatter, | ||
) | ||
parser._positionals.title = 'Subcommands' | ||
parser._optionals.title = 'Global arguments' | ||
parser.add_argument('-v', '--version', action='version', | ||
version='MicroHapulator v{}'.format(microhapulator.__version__)) | ||
parser.add_argument('-l', '--logfile', metavar='F', help='log file for ' | ||
'diagnostic messages, warnings, and errors') | ||
parser.add_argument('--tee', action='store_true', help='write diagnostic ' | ||
'output to logfile AND terminal (stderr)') | ||
subparsers = parser.add_subparsers(dest='cmd', metavar='cmd', | ||
help='"' + subcommandstr + '"') | ||
for func in subparser_funcs.values(): | ||
func(subparsers) | ||
return parser | ||
|
||
|
||
def parse_args(arglist=None): | ||
args = get_parser().parse_args(arglist) | ||
microhapulator.logstream = stderr | ||
if args.logfile and args.logfile != '-': | ||
microhapulator.logstream = open(args.logfile, 'w') | ||
microhapulator.teelog = args.tee | ||
return args |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#!/usr/bin/env python3 | ||
# | ||
# ----------------------------------------------------------------------------- | ||
# Copyright (c) 2019, Battelle National Biodefense Institute. | ||
# | ||
# This file is part of MicroHapulator (github.com/bioforensics/microhapulator) | ||
# and is licensed under the BSD license: see LICENSE.txt. | ||
# ----------------------------------------------------------------------------- | ||
|
||
|
||
def subparser(subparsers): | ||
cli = subparsers.add_parser('refr') | ||
cli.add_argument( | ||
'-o', '--out', metavar='FILE', help='write output to "FILE"; by ' | ||
'default, output is written to the terminal (standard output)' | ||
) | ||
cli.add_argument( | ||
'-d', '--delta', type=int, default=30, metavar='Δ', | ||
help='extend each microhap locus by Δ nucleotides' | ||
) | ||
cli.add_argument( | ||
'-m', '--min-length', type=int, default=350, metavar='M', | ||
help='after applying deltas, if a microhap locus is shorter than M ' | ||
'nucleotides, extend both sides equally so that it is M nucleotides ' | ||
'in length' | ||
) | ||
cli.add_argument( | ||
'refrfasta', help='reference genome file' | ||
) | ||
cli.add_argument( | ||
'panel', nargs='*', help='list of MicroHapDB locus IDs; by default, a ' | ||
'panel of 22 ALFRED microhaplotype loci is used' | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/usr/bin/env python3 | ||
# | ||
# ----------------------------------------------------------------------------- | ||
# Copyright (c) 2019, Battelle National Biodefense Institute. | ||
# | ||
# This file is part of MicroHapulator (github.com/bioforensics/microhapulator) | ||
# and is licensed under the BSD license: see LICENSE.txt. | ||
# ----------------------------------------------------------------------------- | ||
|
||
|
||
def subparser(subparsers): | ||
cli = subparsers.add_parser('sim') | ||
cli._positionals.title = 'Input configuration' | ||
cli._optionals.title = 'Miscellaneous' | ||
|
||
hapargs = cli.add_argument_group('Haplotype simulation') | ||
hapargs.add_argument( | ||
'--panel', nargs='+', metavar='ID', help='list of MicroHapDB locus ' | ||
'IDs for which to simulate data; by default, a panel of 22 ALFRED ' | ||
'microhaplotype loci is used' | ||
) | ||
hapargs.add_argument( | ||
'-r', '--relaxed', action='store_true', help='if a locus in the panel ' | ||
'has no frequency data for a requested population, randomly draw an ' | ||
'allele (from a uniform distribution) from all possible alleles; by ' | ||
'default, these loci are exluded from simulation' | ||
) | ||
hapargs.add_argument( | ||
'--hap-seed', type=int, default=None, metavar='INT', help='random ' | ||
'seed for simulating haplotypes' | ||
) | ||
|
||
seqargs = cli.add_argument_group('Targeted sequencing') | ||
seqargs.add_argument( | ||
'-n', '--num-reads', type=int, default=500000, metavar='N', | ||
help='number of reads to simulate; default is 500000' | ||
) | ||
seqargs.add_argument( | ||
'--seq-seed', type=int, default=None, metavar='INT', help='random ' | ||
'seed for simulated sequencing' | ||
) | ||
seqargs.add_argument( | ||
'--seq-threads', type=int, default=None, metavar='INT', help='number ' | ||
'of threads to use when simulating targeted amplicon sequencing' | ||
) | ||
outargs = cli.add_argument_group('Output configuration') | ||
outargs.add_argument( | ||
'-o', '--out', metavar='FILE', required=True, | ||
help='write simulated MiSeq reads in FASTQ format to FILE; use ' | ||
'`/dev/stdout` to write reads to standard output' | ||
) | ||
outargs.add_argument( | ||
'--genotype', metavar='FILE', help='write simulated genotype data in ' | ||
'BED format to FILE' | ||
) | ||
outargs.add_argument( | ||
'--haploseq', metavar='FILE', help='write simulated haplotype ' | ||
'sequences in FASTA format to FILE' | ||
) | ||
|
||
cli.add_argument('refr', help='reference genome file') | ||
cli.add_argument('popid', nargs='+', help='population ID(s)') | ||
cli._action_groups[1], cli._action_groups[-1] = cli._action_groups[-1], cli._action_groups[1] |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.