Skip to content

Commit

Permalink
Implement CLI with subcommand pattern (#7)
Browse files Browse the repository at this point in the history
This update includes several changes and new features.

- A new `refr` module to retrieve reference genome sequences for a specified panel of microhaplotype loci (closes #4)
- A switch to a subcommand design for the command-line interface
- Additional tests, including tests that depend on `hg38.fasta` not included in the source code distribution.
- Extensive refactoring of the core code and tests in support of the previous points
  • Loading branch information
standage authored Apr 12, 2019
1 parent 65a5fb0 commit 468b2e3
Show file tree
Hide file tree
Showing 26 changed files with 483 additions and 264 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ microhapulator.egg-info/
hg38.fasta
hg38.fasta.fai
.coverage.*
scratch/
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@ help: Makefile

## test: execute the automated test suite
test:
pytest --cov=microhapulator --doctest-modules microhapulator/*.py
pytest --cov=microhapulator --doctest-modules microhapulator/*.py microhapulator/*/test_*.py

## devdeps: install development dependencies
devdeps:
pip3 install --upgrade pip setuptools
pip3 install wheel twine
pip3 install pycodestyle pytest-cov pytest-sugar
pip install --upgrade pip setuptools
pip install wheel twine
pip install pycodestyle pytest-cov pytest-sugar

## clean: remove development artifacts
clean:
rm -rf __pycache__/ microhapulator/__pycache__/ microhapulator/*/__pycache__ build/ dist/ *.egg-info/

## style: check code style against PEP8
style:
pycodestyle --max-line-length=99 microhapulator/*.py
pycodestyle --max-line-length=99 microhapulator/*.py microhapulator/*/*.py

## refr: download GRCh38 reference genome to current directory and index
refr:
Expand Down
43 changes: 37 additions & 6 deletions microhapulator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,46 @@
# and is licensed under the BSD license: see LICENSE.txt.
# -----------------------------------------------------------------------------

# Core libraries
import builtins
from gzip import open as gzopen
from sys import stdin, stdout, stderr

from . import cli
from . import locus
from . import population
from microhapulator.util import data_file, bogus_loci, bogus_index
from microhapulator.context import LocusContext
from microhapulator.genotype import Genotype
# Internal modules
from microhapulator import locus
from microhapulator import population

# Subcommands and command-line interface
from microhapulator import refr
from microhapulator import sim
from microhapulator import cli


from ._version import get_versions
__version__ = get_versions()['version']
del get_versions


logstream = None
teelog = False


def open(filename, mode):
if mode not in ('r', 'w'):
raise ValueError('invalid mode "{}"'.format(mode))
if filename in ['-', None]:
filehandle = stdin if mode == 'r' else stdout
return filehandle
openfunc = builtins.open
if filename.endswith('.gz'):
openfunc = gzopen
mode += 't'
return openfunc(filename, mode)


def plog(*args, **kwargs):
"""Print logging output."""
if logstream is not None:
print(*args, **kwargs, file=logstream)
if logstream is None or teelog:
print(*args, **kwargs, file=stderr)
29 changes: 29 additions & 0 deletions microhapulator/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
#
# -----------------------------------------------------------------------------
# Copyright (c) 2019, Battelle National Biodefense Institute.
#
# This file is part of MicroHapulator (github.com/bioforensics/microhapulator)
# and is licensed under the BSD license: see LICENSE.txt.
# -----------------------------------------------------------------------------

import microhapulator


def main(arglist=None):
"""Entry point for the MicroHapulator CLI.
Isolated as a method so that the CLI can be called by other Python code
(e.g. for testing), in which case the arguments are passed to the function.
If no arguments are passed to the function, parse them from the command
line.
"""
args = microhapulator.cli.parse_args(arglist)
if args.cmd is None: # pragma: no cover
microhapulator.cli.parser().parse_args(['-h'])

assert args.cmd in microhapulator.cli.mains
mainmethod = microhapulator.cli.mains[args.cmd]
versionmessage = '[MicroHapulator] running version {}'.format(microhapulator.__version__)
microhapulator.plog(versionmessage)
mainmethod(args)
60 changes: 60 additions & 0 deletions microhapulator/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# -----------------------------------------------------------------------------
# Copyright (c) 2019, Battelle National Biodefense Institute.
#
# This file is part of MicroHapulator (github.com/bioforensics/microhapulator)
# and is licensed under the BSD license: see LICENSE.txt.
# -----------------------------------------------------------------------------

from argparse import ArgumentParser, RawDescriptionHelpFormatter
import microhapulator
from sys import stderr
from . import refr
from . import sim

mains = {
'refr': microhapulator.refr.main,
'sim': microhapulator.sim.main,
}

subparser_funcs = {
'refr': refr.subparser,
'sim': sim.subparser,
}


def get_parser():
bubbletext = r'''
≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠
MicroHapulator
≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠≠
'''
subcommandstr = '", "'.join(sorted(list(mains.keys())))
parser = ArgumentParser(
description=bubbletext,
formatter_class=RawDescriptionHelpFormatter,
)
parser._positionals.title = 'Subcommands'
parser._optionals.title = 'Global arguments'
parser.add_argument('-v', '--version', action='version',
version='MicroHapulator v{}'.format(microhapulator.__version__))
parser.add_argument('-l', '--logfile', metavar='F', help='log file for '
'diagnostic messages, warnings, and errors')
parser.add_argument('--tee', action='store_true', help='write diagnostic '
'output to logfile AND terminal (stderr)')
subparsers = parser.add_subparsers(dest='cmd', metavar='cmd',
help='"' + subcommandstr + '"')
for func in subparser_funcs.values():
func(subparsers)
return parser


def parse_args(arglist=None):
args = get_parser().parse_args(arglist)
microhapulator.logstream = stderr
if args.logfile and args.logfile != '-':
microhapulator.logstream = open(args.logfile, 'w')
microhapulator.teelog = args.tee
return args
33 changes: 33 additions & 0 deletions microhapulator/cli/refr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
#
# -----------------------------------------------------------------------------
# Copyright (c) 2019, Battelle National Biodefense Institute.
#
# This file is part of MicroHapulator (github.com/bioforensics/microhapulator)
# and is licensed under the BSD license: see LICENSE.txt.
# -----------------------------------------------------------------------------


def subparser(subparsers):
cli = subparsers.add_parser('refr')
cli.add_argument(
'-o', '--out', metavar='FILE', help='write output to "FILE"; by '
'default, output is written to the terminal (standard output)'
)
cli.add_argument(
'-d', '--delta', type=int, default=30, metavar='Δ',
help='extend each microhap locus by Δ nucleotides'
)
cli.add_argument(
'-m', '--min-length', type=int, default=350, metavar='M',
help='after applying deltas, if a microhap locus is shorter than M '
'nucleotides, extend both sides equally so that it is M nucleotides '
'in length'
)
cli.add_argument(
'refrfasta', help='reference genome file'
)
cli.add_argument(
'panel', nargs='*', help='list of MicroHapDB locus IDs; by default, a '
'panel of 22 ALFRED microhaplotype loci is used'
)
63 changes: 63 additions & 0 deletions microhapulator/cli/sim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python3
#
# -----------------------------------------------------------------------------
# Copyright (c) 2019, Battelle National Biodefense Institute.
#
# This file is part of MicroHapulator (github.com/bioforensics/microhapulator)
# and is licensed under the BSD license: see LICENSE.txt.
# -----------------------------------------------------------------------------


def subparser(subparsers):
cli = subparsers.add_parser('sim')
cli._positionals.title = 'Input configuration'
cli._optionals.title = 'Miscellaneous'

hapargs = cli.add_argument_group('Haplotype simulation')
hapargs.add_argument(
'--panel', nargs='+', metavar='ID', help='list of MicroHapDB locus '
'IDs for which to simulate data; by default, a panel of 22 ALFRED '
'microhaplotype loci is used'
)
hapargs.add_argument(
'-r', '--relaxed', action='store_true', help='if a locus in the panel '
'has no frequency data for a requested population, randomly draw an '
'allele (from a uniform distribution) from all possible alleles; by '
'default, these loci are exluded from simulation'
)
hapargs.add_argument(
'--hap-seed', type=int, default=None, metavar='INT', help='random '
'seed for simulating haplotypes'
)

seqargs = cli.add_argument_group('Targeted sequencing')
seqargs.add_argument(
'-n', '--num-reads', type=int, default=500000, metavar='N',
help='number of reads to simulate; default is 500000'
)
seqargs.add_argument(
'--seq-seed', type=int, default=None, metavar='INT', help='random '
'seed for simulated sequencing'
)
seqargs.add_argument(
'--seq-threads', type=int, default=None, metavar='INT', help='number '
'of threads to use when simulating targeted amplicon sequencing'
)
outargs = cli.add_argument_group('Output configuration')
outargs.add_argument(
'-o', '--out', metavar='FILE', required=True,
help='write simulated MiSeq reads in FASTQ format to FILE; use '
'`/dev/stdout` to write reads to standard output'
)
outargs.add_argument(
'--genotype', metavar='FILE', help='write simulated genotype data in '
'BED format to FILE'
)
outargs.add_argument(
'--haploseq', metavar='FILE', help='write simulated haplotype '
'sequences in FASTA format to FILE'
)

cli.add_argument('refr', help='reference genome file')
cli.add_argument('popid', nargs='+', help='population ID(s)')
cli._action_groups[1], cli._action_groups[-1] = cli._action_groups[-1], cli._action_groups[1]
70 changes: 0 additions & 70 deletions microhapulator/context.py

This file was deleted.

Loading

0 comments on commit 468b2e3

Please sign in to comment.