Skip to content

Commit

Permalink
Merge pull request #650 from wasade/issue639
Browse files Browse the repository at this point in the history
Issue639
  • Loading branch information
jairideout committed Oct 19, 2015
2 parents 922068c + e5ab3cb commit ecbc2e4
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 1 deletion.
4 changes: 3 additions & 1 deletion ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ Changes:

* Codebase updated to reflect pep8 1.6.x

Changes:
New features:

* `Table.to_hdf5` and `Table.from_hdf5` now support custom parsers and
formatters, see issue #608
* `Table.head` has been added to retrieve the first few rows and or columns
from a table, see issue #639.

Bug fixes:

Expand Down
59 changes: 59 additions & 0 deletions biom/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,65 @@ def transpose(self):
self.ids()[:], self.ids(axis='observation')[:],
sample_md_copy, obs_md_copy, self.table_id)

def head(self, n=5, m=5):
"""Get the first n rows and m columns from self
Parameters
----------
n : int, optional
The number of rows (observations) to get. This number must be
greater than 0. If not specified, 5 rows will be retrieved.
m : int, optional
The number of columns (samples) to get. This number must be
greater than 0. If not specified, 5 columns will be
retrieved.
Notes
-----
Like `head` for Linux like systems, requesting more rows (or columns)
than exists will silently work.
Raises
------
IndexError
If `n` or `m` are <= 0.
Returns
-------
Table
The subset table.
Examples
--------
>>> import numpy as np
>>> from biom.table import Table
>>> data = np.arange(100).reshape(5, 20)
>>> obs_ids = ['O%d' % i for i in range(1, 6)]
>>> samp_ids = ['S%d' % i for i in range(1, 21)]
>>> table = Table(data, obs_ids, samp_ids)
>>> print table.head() # doctest: +NORMALIZE_WHITESPACE
# Constructed from biom file
#OTU ID S1 S2 S3 S4 S5
O1 0.0 1.0 2.0 3.0 4.0
O2 20.0 21.0 22.0 23.0 24.0
O3 40.0 41.0 42.0 43.0 44.0
O4 60.0 61.0 62.0 63.0 64.0
O5 80.0 81.0 82.0 83.0 84.0
"""
if n <= 0:
raise IndexError("n cannot be <= 0.")

if m <= 0:
raise IndexError("m cannot be <= 0.")

row_ids = self.ids(axis='observation')[:n]
col_ids = self.ids(axis='sample')[:m]

table = self.filter(row_ids, axis='observation', inplace=False)
return table.filter(col_ids, axis='sample')

def group_metadata(self, axis='sample'):
"""Return the group metadata of the given axis
Expand Down
52 changes: 52 additions & 0 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import numpy as np
from scipy.sparse import lil_matrix, csr_matrix, csc_matrix

from biom import example_table
from biom.exception import UnknownAxisError, UnknownIDError, TableException
from biom.util import unzip, HAVE_H5PY, H5PY_VLEN_STR
from biom.table import (Table, prefer_self, index_list, list_nparray_to_sparse,
Expand All @@ -44,6 +45,57 @@


class SupportTests(TestCase):

def test_head(self):
# example table is 2 x 3, so no change in contained data
exp = example_table
obs = example_table.head()
self.assertIsNot(obs, exp)
self.assertEqual(obs, exp)

def test_head_bounded(self):
obs = example_table.head(1)
from collections import defaultdict
exp = Table(np.array([[0., 1., 2.]]), ['O1'], ['S1', 'S2', 'S3'],
[{'taxonomy': ['Bacteria', 'Firmicutes']}],
[{'environment': 'A'}, {'environment': 'B'},
{'environment': 'A'}])

self.assertEqual(obs, exp)

obs = example_table.head(m=2)
exp = Table(np.array([[0., 1.], [3., 4.]]), ['O1', 'O2'], ['S1', 'S2'],
[{'taxonomy': ['Bacteria', 'Firmicutes']},
{'taxonomy': ['Bacteria', 'Bacteroidetes']}],
[{'environment': 'A'}, {'environment': 'B'}])
self.assertEqual(obs, exp)

def test_head_overstep(self):
# silently works
exp = example_table
obs = example_table.head(10000)
self.assertIsNot(obs, exp)
self.assertEqual(obs, exp)

def test_head_zero_or_neg(self):
with self.assertRaises(IndexError):
example_table.head(0)

with self.assertRaises(IndexError):
example_table.head(-1)

with self.assertRaises(IndexError):
example_table.head(m=0)

with self.assertRaises(IndexError):
example_table.head(m=-1)

with self.assertRaises(IndexError):
example_table.head(0, 5)

with self.assertRaises(IndexError):
example_table.head(5, 0)

def test_table_sparse_nparray(self):
"""beat the table sparsely to death"""
# nparray test
Expand Down

0 comments on commit ecbc2e4

Please sign in to comment.