diff --git a/.travis.yml b/.travis.yml index ff3a50c8..bc6760e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ # Modified from https://github.com/biocore/scikit-bio/ language: python env: - - PYTHON_VERSION=2.7 WITH_DOCTEST=False USE_CYTHON=True - PYTHON_VERSION=3.5 WITH_DOCTEST=True USE_CYTHON=True - PYTHON_VERSION=3.6 WITH_DOCTEST=True USE_CYTHON=True - PYTHON_VERSION=3.7 WITH_DOCTEST=True USE_CYTHON=True @@ -14,14 +13,13 @@ install: - conda create --yes -n env_name python=$PYTHON_VERSION pip click numpy scipy pep8 flake8 coverage future six "pandas>=0.20.0" nose h5py>=2.2.0 cython - rm biom/*.c - source activate env_name - - if [ ${PYTHON_VERSION} = "2.7" ]; then pip install pyqi; fi - - if [ ${PYTHON_VERSION} = "2.7" ]; then conda install --yes Sphinx=1.2.2; fi + - if [ ${PYTHON_VERSION} = "3.5" ]; then pip install sphinx==1.2.2; fi - pip install coveralls - pip install -e . --no-deps script: - make test - biom show-install-info - - if [ ${PYTHON_VERSION} = "2.7" ]; then make -C doc html; fi + - if [ ${PYTHON_VERSION} = "3.5" ]; then make -C doc html; fi # we can only validate the tables if we have H5PY - for table in examples/*hdf5.biom; do echo ${table}; biom validate-table -i ${table}; done # validate JSON formatted tables diff --git a/ChangeLog.md b/ChangeLog.md index 3c006d5b..c41d55a1 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -6,8 +6,16 @@ biom 2.1.7-dev New changes since 2.1.7 go here. +Important: + +* Python 2.7 support has been dropped. +* A change to the defaults for `Table.nonzero_counts` was performed such that the default now is to count the number of nonzero features. See [issue #685](https://github.com/biocore/biom-format/issues/685) + New Features: +* The detailed report is no longer part of the table validator. See [issue #378](https://github.com/biocore/biom-format/issues/378). +* `load_table` now accepts open file handles. See [issue #481](https://github.com/biocore/biom-format/issues/481). + Bug fixes: * `Table.to_dataframe(dense=False)` does now correctly produce sparse data frames (and not accidentally dense ones as before). See [issue #808](https://github.com/biocore/biom-format/issues/808). diff --git a/biom/cli/table_validator.py b/biom/cli/table_validator.py index a6478a08..09e6639d 100644 --- a/biom/cli/table_validator.py +++ b/biom/cli/table_validator.py @@ -29,9 +29,7 @@ ' specification') @click.option('-f', '--format-version', default=None, help='The specific format version to validate against') -@click.option('--detailed-report', is_flag=True, default=False, - help='Include more details in the output report') -def validate_table(input_fp, format_version, detailed_report): +def validate_table(input_fp, format_version): """Validate a BIOM-formatted file. Test a file for adherence to the Biological Observation Matrix (BIOM) @@ -46,7 +44,7 @@ def validate_table(input_fp, format_version, detailed_report): $ biom validate-table -i table.biom """ - valid, report = _validate_table(input_fp, format_version, detailed_report) + valid, report = _validate_table(input_fp, format_version) click.echo("\n".join(report)) if valid: # apparently silence is too quiet to be golden. @@ -57,9 +55,8 @@ def validate_table(input_fp, format_version, detailed_report): sys.exit(1) -def _validate_table(input_fp, format_version=None, detailed_report=False): - result = TableValidator()(table=input_fp, format_version=format_version, - detailed_report=detailed_report) +def _validate_table(input_fp, format_version=None): + result = TableValidator()(table=input_fp, format_version=format_version) return result['valid_table'], result['report_lines'] @@ -108,23 +105,15 @@ def run(self, **kwargs): raise IOError("h5py is not installed, can only validate JSON " "tables") - def __call__(self, table, format_version=None, detailed_report=False): - return self.run(table=table, format_version=format_version, - detailed_report=detailed_report) + def __call__(self, table, format_version=None): + return self.run(table=table, format_version=format_version) def _validate_hdf5(self, **kwargs): table = kwargs['table'] - # Need to make this an attribute so that we have this info during - # validation. - detailed_report = kwargs['detailed_report'] - report_lines = [] valid_table = True - if detailed_report: - report_lines.append("Validating BIOM table...") - required_attrs = [ ('format-url', self._valid_format_url), ('format-version', self._valid_hdf5_format_version), @@ -154,9 +143,6 @@ def _validate_hdf5(self, **kwargs): report_lines.append("Missing attribute: '%s'" % required_attr) continue - if detailed_report: - report_lines.append("Validating '%s'..." % required_attr) - status_msg = attr_validator(table) if len(status_msg) > 0: @@ -166,20 +152,12 @@ def _validate_hdf5(self, **kwargs): for group in required_groups: if group not in table: valid_table = False - if detailed_report: - report_lines.append("Missing group: %s" % group) for dataset in required_datasets: if dataset not in table: valid_table = False - if detailed_report: - report_lines.append("Missing dataset: %s" % dataset) if 'shape' in table.attrs: - if detailed_report: - report_lines.append("Validating 'shape' versus number of " - "samples and observations...") - n_obs, n_samp = table.attrs['shape'] obs_ids = table.get('observation/ids', None) samp_ids = table.get('sample/ids', None) @@ -270,14 +248,10 @@ def _validate_json(self, **kwargs): # Need to make this an attribute so that we have this info during # validation. self._format_version = kwargs['format_version'] - detailed_report = kwargs['detailed_report'] report_lines = [] valid_table = True - if detailed_report: - report_lines.append("Validating BIOM table...") - required_keys = [ ('format', self._valid_format), ('format_url', self._valid_format_url), @@ -299,9 +273,6 @@ def _validate_json(self, **kwargs): report_lines.append("Missing field: '%s'" % key) continue - if detailed_report: - report_lines.append("Validating '%s'..." % key) - status_msg = method(table_json) if len(status_msg) > 0: @@ -309,10 +280,6 @@ def _validate_json(self, **kwargs): report_lines.append(status_msg) if 'shape' in table_json: - if detailed_report: - report_lines.append("Validating 'shape' versus number of rows " - "and columns...") - if ('rows' in table_json and len(table_json['rows']) != table_json['shape'][0]): valid_table = False diff --git a/biom/parse.py b/biom/parse.py index 427d6adf..cf824afe 100644 --- a/biom/parse.py +++ b/biom/parse.py @@ -12,6 +12,8 @@ import numpy as np from future.utils import string_types +import io +import h5py from biom.exception import BiomParseException, UnknownAxisError from biom.table import Table @@ -658,9 +660,15 @@ def load_table(f): >>> table = load_table('path/to/table.biom') # doctest: +SKIP """ - with biom_open(f) as fp: + if isinstance(f, (io.IOBase, h5py.File)): try: - table = parse_biom_table(fp) + table = parse_biom_table(f) except (IndexError, TypeError): raise TypeError("%s does not appear to be a BIOM file!" % f) + else: + with biom_open(f) as fp: + try: + table = parse_biom_table(fp) + except (IndexError, TypeError): + raise TypeError("%s does not appear to be a BIOM file!" % f) return table diff --git a/biom/table.py b/biom/table.py index a19b6812..0411ea1a 100644 --- a/biom/table.py +++ b/biom/table.py @@ -3103,7 +3103,7 @@ def nonzero(self): for col_idx in indices[start:end]: yield (obs_id, samp_ids[col_idx]) - def nonzero_counts(self, axis, binary=False): + def nonzero_counts(self, axis, binary=True): """Get nonzero summaries about an axis Parameters @@ -3111,7 +3111,7 @@ def nonzero_counts(self, axis, binary=False): axis : {'sample', 'observation', 'whole'} The axis on which to count nonzero entries binary : bool, optional - Defaults to ``False``. If ``True``, return number of nonzero + Defaults to ``True``. If ``True``, return number of nonzero entries. If ``False``, sum the values of the entries. Returns diff --git a/biom/tests/test_cli/test_validate_table.py b/biom/tests/test_cli/test_validate_table.py index f4bf4662..cf7558f6 100644 --- a/biom/tests/test_cli/test_validate_table.py +++ b/biom/tests/test_cli/test_validate_table.py @@ -121,9 +121,8 @@ def test_valid(self): f.close() self.to_remove.append('valid_test3') - obs = self.cmd(table='valid_test3', detailed_report=True) + obs = self.cmd(table='valid_test3') self.assertTrue(obs['valid_table']) - self.assertTrue(len(obs['report_lines']) > 0) def test_invalid(self): """Correctly invalidates a table that is... invalid.""" diff --git a/biom/tests/test_parse.py b/biom/tests/test_parse.py index 0465130e..8bcaa1ba 100644 --- a/biom/tests/test_parse.py +++ b/biom/tests/test_parse.py @@ -16,7 +16,8 @@ import numpy as np import numpy.testing as npt -from biom.parse import generatedby, MetadataMap, parse_biom_table, parse_uc +from biom.parse import (generatedby, MetadataMap, parse_biom_table, parse_uc, + load_table) from biom.table import Table from biom.util import HAVE_H5PY, __version__ from biom.tests.long_lines import (uc_empty, uc_invalid_id, uc_minimal, @@ -237,6 +238,32 @@ def test_parse_biom_table_hdf5(self): Table.from_hdf5(h5py.File('test_data/test.biom')) os.chdir(cwd) + @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed') + def test_load_table_filepath(self): + cwd = os.getcwd() + if '/' in __file__[1:]: + os.chdir(__file__.rsplit('/', 1)[0]) + load_table('test_data/test.biom') + os.chdir(cwd) + + @npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed') + def test_load_table_inmemory(self): + cwd = os.getcwd() + if '/' in __file__[1:]: + os.chdir(__file__.rsplit('/', 1)[0]) + load_table(h5py.File('test_data/test.biom')) + os.chdir(cwd) + + def test_load_table_inmemory_json(self): + cwd = os.getcwd() + if '/' in __file__[1:]: + os.chdir(__file__.rsplit('/', 1)[0]) + load_table(open('test_data/test.json')) + os.chdir(cwd) + + def test_load_table_inmemory_stringio(self): + load_table(StringIO('\n'.join(self.classic_otu_table1_no_tax))) + def test_parse_biom_table(self): """tests for parse_biom_table when we do not have h5py""" # This is a TSV as a list of lines diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py index 2a88f62b..784de143 100644 --- a/biom/tests/test_table.py +++ b/biom/tests/test_table.py @@ -2235,9 +2235,9 @@ def test_nonzero_counts(self): exp_obs = np.array([14, 15, 0]) exp_whole = np.array([29]) - obs_samp = st.nonzero_counts('sample') - obs_obs = st.nonzero_counts('observation') - obs_whole = st.nonzero_counts('whole') + obs_samp = st.nonzero_counts('sample', binary=False) + obs_obs = st.nonzero_counts('observation', binary=False) + obs_whole = st.nonzero_counts('whole', binary=False) npt.assert_equal(obs_samp, exp_samp) npt.assert_equal(obs_obs, exp_obs)