Skip to content

Commit

Permalink
WIP: Low hanging items (#824)
Browse files Browse the repository at this point in the history
* TST: remove detailed report lines per #378

* MAINT: remove detailed report

* DOC: note the change

* BLD: remove py27 support, shift sphinx docs to 3.5

* API/DOC: fixes #685 changing a default parameter to Table.nonzero_counts, and noting in the documentation

* API: allow open file handles to be passed to load_table

* DOC: changelog mention
  • Loading branch information
wasade authored and ElDeveloper committed Aug 30, 2019
1 parent 38e64fc commit 39e9881
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 53 deletions.
6 changes: 2 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Modified from https://github.com/biocore/scikit-bio/
language: python
env:
- PYTHON_VERSION=2.7 WITH_DOCTEST=False USE_CYTHON=True
- PYTHON_VERSION=3.5 WITH_DOCTEST=True USE_CYTHON=True
- PYTHON_VERSION=3.6 WITH_DOCTEST=True USE_CYTHON=True
- PYTHON_VERSION=3.7 WITH_DOCTEST=True USE_CYTHON=True
Expand All @@ -14,14 +13,13 @@ install:
- conda create --yes -n env_name python=$PYTHON_VERSION pip click numpy scipy pep8 flake8 coverage future six "pandas>=0.20.0" nose h5py>=2.2.0 cython
- rm biom/*.c
- source activate env_name
- if [ ${PYTHON_VERSION} = "2.7" ]; then pip install pyqi; fi
- if [ ${PYTHON_VERSION} = "2.7" ]; then conda install --yes Sphinx=1.2.2; fi
- if [ ${PYTHON_VERSION} = "3.5" ]; then pip install sphinx==1.2.2; fi
- pip install coveralls
- pip install -e . --no-deps
script:
- make test
- biom show-install-info
- if [ ${PYTHON_VERSION} = "2.7" ]; then make -C doc html; fi
- if [ ${PYTHON_VERSION} = "3.5" ]; then make -C doc html; fi
# we can only validate the tables if we have H5PY
- for table in examples/*hdf5.biom; do echo ${table}; biom validate-table -i ${table}; done
# validate JSON formatted tables
Expand Down
8 changes: 8 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@ biom 2.1.7-dev

New changes since 2.1.7 go here.

Important:

* Python 2.7 support has been dropped.
* A change to the defaults for `Table.nonzero_counts` was performed such that the default now is to count the number of nonzero features. See [issue #685](https://github.com/biocore/biom-format/issues/685)

New Features:

* The detailed report is no longer part of the table validator. See [issue #378](https://github.com/biocore/biom-format/issues/378).
* `load_table` now accepts open file handles. See [issue #481](https://github.com/biocore/biom-format/issues/481).

Bug fixes:

* `Table.to_dataframe(dense=False)` does now correctly produce sparse data frames (and not accidentally dense ones as before). See [issue #808](https://github.com/biocore/biom-format/issues/808).
Expand Down
45 changes: 6 additions & 39 deletions biom/cli/table_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@
' specification')
@click.option('-f', '--format-version', default=None,
help='The specific format version to validate against')
@click.option('--detailed-report', is_flag=True, default=False,
help='Include more details in the output report')
def validate_table(input_fp, format_version, detailed_report):
def validate_table(input_fp, format_version):
"""Validate a BIOM-formatted file.
Test a file for adherence to the Biological Observation Matrix (BIOM)
Expand All @@ -46,7 +44,7 @@ def validate_table(input_fp, format_version, detailed_report):
$ biom validate-table -i table.biom
"""
valid, report = _validate_table(input_fp, format_version, detailed_report)
valid, report = _validate_table(input_fp, format_version)
click.echo("\n".join(report))
if valid:
# apparently silence is too quiet to be golden.
Expand All @@ -57,9 +55,8 @@ def validate_table(input_fp, format_version, detailed_report):
sys.exit(1)


def _validate_table(input_fp, format_version=None, detailed_report=False):
result = TableValidator()(table=input_fp, format_version=format_version,
detailed_report=detailed_report)
def _validate_table(input_fp, format_version=None):
result = TableValidator()(table=input_fp, format_version=format_version)
return result['valid_table'], result['report_lines']


Expand Down Expand Up @@ -108,23 +105,15 @@ def run(self, **kwargs):
raise IOError("h5py is not installed, can only validate JSON "
"tables")

def __call__(self, table, format_version=None, detailed_report=False):
return self.run(table=table, format_version=format_version,
detailed_report=detailed_report)
def __call__(self, table, format_version=None):
return self.run(table=table, format_version=format_version)

def _validate_hdf5(self, **kwargs):
table = kwargs['table']

# Need to make this an attribute so that we have this info during
# validation.
detailed_report = kwargs['detailed_report']

report_lines = []
valid_table = True

if detailed_report:
report_lines.append("Validating BIOM table...")

required_attrs = [
('format-url', self._valid_format_url),
('format-version', self._valid_hdf5_format_version),
Expand Down Expand Up @@ -154,9 +143,6 @@ def _validate_hdf5(self, **kwargs):
report_lines.append("Missing attribute: '%s'" % required_attr)
continue

if detailed_report:
report_lines.append("Validating '%s'..." % required_attr)

status_msg = attr_validator(table)

if len(status_msg) > 0:
Expand All @@ -166,20 +152,12 @@ def _validate_hdf5(self, **kwargs):
for group in required_groups:
if group not in table:
valid_table = False
if detailed_report:
report_lines.append("Missing group: %s" % group)

for dataset in required_datasets:
if dataset not in table:
valid_table = False
if detailed_report:
report_lines.append("Missing dataset: %s" % dataset)

if 'shape' in table.attrs:
if detailed_report:
report_lines.append("Validating 'shape' versus number of "
"samples and observations...")

n_obs, n_samp = table.attrs['shape']
obs_ids = table.get('observation/ids', None)
samp_ids = table.get('sample/ids', None)
Expand Down Expand Up @@ -270,14 +248,10 @@ def _validate_json(self, **kwargs):
# Need to make this an attribute so that we have this info during
# validation.
self._format_version = kwargs['format_version']
detailed_report = kwargs['detailed_report']

report_lines = []
valid_table = True

if detailed_report:
report_lines.append("Validating BIOM table...")

required_keys = [
('format', self._valid_format),
('format_url', self._valid_format_url),
Expand All @@ -299,20 +273,13 @@ def _validate_json(self, **kwargs):
report_lines.append("Missing field: '%s'" % key)
continue

if detailed_report:
report_lines.append("Validating '%s'..." % key)

status_msg = method(table_json)

if len(status_msg) > 0:
valid_table = False
report_lines.append(status_msg)

if 'shape' in table_json:
if detailed_report:
report_lines.append("Validating 'shape' versus number of rows "
"and columns...")

if ('rows' in table_json and
len(table_json['rows']) != table_json['shape'][0]):
valid_table = False
Expand Down
12 changes: 10 additions & 2 deletions biom/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

import numpy as np
from future.utils import string_types
import io
import h5py

from biom.exception import BiomParseException, UnknownAxisError
from biom.table import Table
Expand Down Expand Up @@ -658,9 +660,15 @@ def load_table(f):
>>> table = load_table('path/to/table.biom') # doctest: +SKIP
"""
with biom_open(f) as fp:
if isinstance(f, (io.IOBase, h5py.File)):
try:
table = parse_biom_table(fp)
table = parse_biom_table(f)
except (IndexError, TypeError):
raise TypeError("%s does not appear to be a BIOM file!" % f)
else:
with biom_open(f) as fp:
try:
table = parse_biom_table(fp)
except (IndexError, TypeError):
raise TypeError("%s does not appear to be a BIOM file!" % f)
return table
4 changes: 2 additions & 2 deletions biom/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3103,15 +3103,15 @@ def nonzero(self):
for col_idx in indices[start:end]:
yield (obs_id, samp_ids[col_idx])

def nonzero_counts(self, axis, binary=False):
def nonzero_counts(self, axis, binary=True):
"""Get nonzero summaries about an axis
Parameters
----------
axis : {'sample', 'observation', 'whole'}
The axis on which to count nonzero entries
binary : bool, optional
Defaults to ``False``. If ``True``, return number of nonzero
Defaults to ``True``. If ``True``, return number of nonzero
entries. If ``False``, sum the values of the entries.
Returns
Expand Down
3 changes: 1 addition & 2 deletions biom/tests/test_cli/test_validate_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,8 @@ def test_valid(self):
f.close()
self.to_remove.append('valid_test3')

obs = self.cmd(table='valid_test3', detailed_report=True)
obs = self.cmd(table='valid_test3')
self.assertTrue(obs['valid_table'])
self.assertTrue(len(obs['report_lines']) > 0)

def test_invalid(self):
"""Correctly invalidates a table that is... invalid."""
Expand Down
29 changes: 28 additions & 1 deletion biom/tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
import numpy as np
import numpy.testing as npt

from biom.parse import generatedby, MetadataMap, parse_biom_table, parse_uc
from biom.parse import (generatedby, MetadataMap, parse_biom_table, parse_uc,
load_table)
from biom.table import Table
from biom.util import HAVE_H5PY, __version__
from biom.tests.long_lines import (uc_empty, uc_invalid_id, uc_minimal,
Expand Down Expand Up @@ -237,6 +238,32 @@ def test_parse_biom_table_hdf5(self):
Table.from_hdf5(h5py.File('test_data/test.biom'))
os.chdir(cwd)

@npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
def test_load_table_filepath(self):
cwd = os.getcwd()
if '/' in __file__[1:]:
os.chdir(__file__.rsplit('/', 1)[0])
load_table('test_data/test.biom')
os.chdir(cwd)

@npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
def test_load_table_inmemory(self):
cwd = os.getcwd()
if '/' in __file__[1:]:
os.chdir(__file__.rsplit('/', 1)[0])
load_table(h5py.File('test_data/test.biom'))
os.chdir(cwd)

def test_load_table_inmemory_json(self):
cwd = os.getcwd()
if '/' in __file__[1:]:
os.chdir(__file__.rsplit('/', 1)[0])
load_table(open('test_data/test.json'))
os.chdir(cwd)

def test_load_table_inmemory_stringio(self):
load_table(StringIO('\n'.join(self.classic_otu_table1_no_tax)))

def test_parse_biom_table(self):
"""tests for parse_biom_table when we do not have h5py"""
# This is a TSV as a list of lines
Expand Down
6 changes: 3 additions & 3 deletions biom/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2235,9 +2235,9 @@ def test_nonzero_counts(self):
exp_obs = np.array([14, 15, 0])
exp_whole = np.array([29])

obs_samp = st.nonzero_counts('sample')
obs_obs = st.nonzero_counts('observation')
obs_whole = st.nonzero_counts('whole')
obs_samp = st.nonzero_counts('sample', binary=False)
obs_obs = st.nonzero_counts('observation', binary=False)
obs_whole = st.nonzero_counts('whole', binary=False)

npt.assert_equal(obs_samp, exp_samp)
npt.assert_equal(obs_obs, exp_obs)
Expand Down

0 comments on commit 39e9881

Please sign in to comment.