Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue639 #650

Merged
merged 4 commits into from
Oct 19, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ Changes:

* Codebase updated to reflect pep8 1.6.x

Changes:
New features:

* `Table.to_hdf5` and `Table.from_hdf5` now support custom parsers and
formatters, see issue #608
* `Table.head` has been added to retrieve the first few rows and or columns
from a table, see issue #639.

Bug fixes:

Expand Down
59 changes: 59 additions & 0 deletions biom/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,65 @@ def transpose(self):
self.ids()[:], self.ids(axis='observation')[:],
sample_md_copy, obs_md_copy, self.table_id)

def head(self, n=5, m=5):
"""Get the first n rows and m columns from self

Parameters
----------
n : int, optional
The number of rows (observations) to get. This number must be
greater than 0. If not specified, 5 rows will be retrieved.

m : int, optional
The number of columns (samples) to get. This number must be
greater than 0. If not specified, 5 columns will be
retrieved.

Notes
-----
Like `head` for Linux like systems, requesting more rows (or columns)
than exists will silently work.

Raises
------
IndexError
If `n` or `m` are <= 0.

Returns
-------
Table
The subset table.

Examples
--------
>>> import numpy as np
>>> from biom.table import Table
>>> data = np.arange(100).reshape(5, 20)
>>> obs_ids = ['O%d' % i for i in range(1, 6)]
>>> samp_ids = ['S%d' % i for i in range(1, 21)]
>>> table = Table(data, obs_ids, samp_ids)
>>> print table.head() # doctest: +NORMALIZE_WHITESPACE
# Constructed from biom file
#OTU ID S1 S2 S3 S4 S5
O1 0.0 1.0 2.0 3.0 4.0
O2 20.0 21.0 22.0 23.0 24.0
O3 40.0 41.0 42.0 43.0 44.0
O4 60.0 61.0 62.0 63.0 64.0
O5 80.0 81.0 82.0 83.0 84.0

"""
if n <= 0:
raise IndexError("n cannot be <= 0.")

if m <= 0:
raise IndexError("m cannot be <= 0.")

row_ids = self.ids(axis='observation')[:n]
col_ids = self.ids(axis='sample')[:m]

table = self.filter(row_ids, axis='observation', inplace=False)
return table.filter(col_ids, axis='sample')

def group_metadata(self, axis='sample'):
"""Return the group metadata of the given axis

Expand Down
52 changes: 52 additions & 0 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import numpy as np
from scipy.sparse import lil_matrix, csr_matrix, csc_matrix

from biom import example_table
from biom.exception import UnknownAxisError, UnknownIDError, TableException
from biom.util import unzip, HAVE_H5PY, H5PY_VLEN_STR
from biom.table import (Table, prefer_self, index_list, list_nparray_to_sparse,
Expand All @@ -44,6 +45,57 @@


class SupportTests(TestCase):

def test_head(self):
# example table is 2 x 3, so no change in contained data
exp = example_table
obs = example_table.head()
self.assertIsNot(obs, exp)
self.assertEqual(obs, exp)

def test_head_bounded(self):
obs = example_table.head(1)
from collections import defaultdict
exp = Table(np.array([[0., 1., 2.]]), ['O1'], ['S1', 'S2', 'S3'],
[{'taxonomy': ['Bacteria', 'Firmicutes']}],
[{'environment': 'A'}, {'environment': 'B'},
{'environment': 'A'}])

self.assertEqual(obs, exp)

obs = example_table.head(m=2)
exp = Table(np.array([[0., 1.], [3., 4.]]), ['O1', 'O2'], ['S1', 'S2'],
[{'taxonomy': ['Bacteria', 'Firmicutes']},
{'taxonomy': ['Bacteria', 'Bacteroidetes']}],
[{'environment': 'A'}, {'environment': 'B'}])
self.assertEqual(obs, exp)

def test_head_overstep(self):
# silently works
exp = example_table
obs = example_table.head(10000)
self.assertIsNot(obs, exp)
self.assertEqual(obs, exp)

def test_head_zero_or_neg(self):
with self.assertRaises(IndexError):
example_table.head(0)

with self.assertRaises(IndexError):
example_table.head(-1)

with self.assertRaises(IndexError):
example_table.head(m=0)

with self.assertRaises(IndexError):
example_table.head(m=-1)

with self.assertRaises(IndexError):
example_table.head(0, 5)

with self.assertRaises(IndexError):
example_table.head(5, 0)

def test_table_sparse_nparray(self):
"""beat the table sparsely to death"""
# nparray test
Expand Down