From edddd3606ac61d5e9a4e12b24ff5f9f27e02a3f5 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 19 Oct 2015 08:03:22 -0600 Subject: [PATCH 1/4] TST: tests for Table.head --- tests/test_table.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/test_table.py b/tests/test_table.py index adfe0ba9..be4699e8 100644 --- a/tests/test_table.py +++ b/tests/test_table.py @@ -19,6 +19,7 @@ import numpy as np from scipy.sparse import lil_matrix, csr_matrix, csc_matrix +from biom import example_table from biom.exception import UnknownAxisError, UnknownIDError, TableException from biom.util import unzip, HAVE_H5PY, H5PY_VLEN_STR from biom.table import (Table, prefer_self, index_list, list_nparray_to_sparse, @@ -44,6 +45,49 @@ class SupportTests(TestCase): + + def test_head(self): + # example table is 2 x 3, so no change in contained data + exp = example_table + obs = example_table.head() + self.assertIsNot(obs, exp) + self.assertEqual(obs, exp) + + def head_bounded(self): + obs = example_table.head(1) + exp = Table(np.array([[0., 1., 2.]]), ['O1'], ['S1', 'S2', 'S3']) + self.assertEqual(obs, exp) + + obs = example_table.head(m=2) + exp = Table(np.array([[0., 1.], [3., 4.]]), ['O1', 'O2'], ['S1', 'S2']) + self.assertEqual(obs, exp) + + def test_head_overstep(self): + # silently works + exp = example_table + obs = example_table.head(10000) + self.assertIsNot(obs, exp) + self.assertEqual(obs, exp) + + def test_head_zero_or_neg(self): + with self.assertRaises(IndexError): + example_table.head(0) + + with self.assertRaises(IndexError): + example_table.head(-1) + + with self.assertRaises(IndexError): + example_table.head(m=0) + + with self.assertRaises(IndexError): + example_table.head(m=-1) + + with self.assertRaises(IndexError): + example_table.head(0, 5) + + with self.assertRaises(IndexError): + example_table.head(5, 0) + def test_table_sparse_nparray(self): """beat the table sparsely to death""" # nparray test From b9cd3f83c2290c24b1549d8442c1829f0666bff5 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 19 Oct 2015 08:03:40 -0600 Subject: [PATCH 2/4] ENH/API: added Table.head --- biom/table.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/biom/table.py b/biom/table.py index 04b09460..7992c0ac 100644 --- a/biom/table.py +++ b/biom/table.py @@ -835,6 +835,65 @@ def transpose(self): self.ids()[:], self.ids(axis='observation')[:], sample_md_copy, obs_md_copy, self.table_id) + def head(self, n=5, m=5): + """Get the first n rows and m columns from self + + Parameters + ---------- + n : int, optional + The number of rows (observations) to get. This number must be + greater than 0. If not specified, 5 rows will be retrieved. + + m : int, optional + The number of columns (samples) to get. This number must be + greater than 0. If not specified, 5 columns will be + retrieved. + + Notes + ----- + Like `head` for Linux like systems, requesting more rows (or columns) + than exists will silently work. + + Raises + ------ + IndexError + If `n` or `m` are <= 0. + + Returns + ------- + Table + The subset table. + + Examples + -------- + >>> import numpy as np + >>> from biom.table import Table + >>> data = np.arange(100).reshape(5, 20) + >>> obs_ids = ['O%d' % i for i in range(1, 6)] + >>> samp_ids = ['S%d' % i for i in range(1, 21)] + >>> table = Table(data, obs_ids, samp_ids) + >>> print table.head() # doctest: +NORMALIZE_WHITESPACE + # Constructed from biom file + #OTU ID S1 S2 S3 S4 S5 + O1 0.0 1.0 2.0 3.0 4.0 + O2 20.0 21.0 22.0 23.0 24.0 + O3 40.0 41.0 42.0 43.0 44.0 + O4 60.0 61.0 62.0 63.0 64.0 + O5 80.0 81.0 82.0 83.0 84.0 + + """ + if n <= 0: + raise IndexError("n cannot be <= 0.") + + if m <= 0: + raise IndexError("m cannot be <= 0.") + + row_ids = self.ids(axis='observation')[:n] + col_ids = self.ids(axis='sample')[:m] + + table = self.filter(row_ids, axis='observation', inplace=False) + return table.filter(col_ids, axis='sample') + def group_metadata(self, axis='sample'): """Return the group metadata of the given axis From 64a05e339066895e37227cab0ad1246cfe070a96 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 19 Oct 2015 08:07:17 -0600 Subject: [PATCH 3/4] DOC: changelog mention --- ChangeLog.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index eebf2fe3..c680353e 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -10,10 +10,12 @@ Changes: * Codebase updated to reflect pep8 1.6.x -Changes: +New features: * `Table.to_hdf5` and `Table.from_hdf5` now support custom parsers and formatters, see issue #608 +* `Table.head` has been added to retrieve the first few rows and or columns + from a table, see issue #639. Bug fixes: From e5ab3cb73a2294a7ecc32e54ba3ce449a43194bb Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 19 Oct 2015 10:46:33 -0600 Subject: [PATCH 4/4] Addressing @jairideout's comments --- tests/test_table.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/test_table.py b/tests/test_table.py index be4699e8..6acc0200 100644 --- a/tests/test_table.py +++ b/tests/test_table.py @@ -53,13 +53,21 @@ def test_head(self): self.assertIsNot(obs, exp) self.assertEqual(obs, exp) - def head_bounded(self): + def test_head_bounded(self): obs = example_table.head(1) - exp = Table(np.array([[0., 1., 2.]]), ['O1'], ['S1', 'S2', 'S3']) + from collections import defaultdict + exp = Table(np.array([[0., 1., 2.]]), ['O1'], ['S1', 'S2', 'S3'], + [{'taxonomy': ['Bacteria', 'Firmicutes']}], + [{'environment': 'A'}, {'environment': 'B'}, + {'environment': 'A'}]) + self.assertEqual(obs, exp) obs = example_table.head(m=2) - exp = Table(np.array([[0., 1.], [3., 4.]]), ['O1', 'O2'], ['S1', 'S2']) + exp = Table(np.array([[0., 1.], [3., 4.]]), ['O1', 'O2'], ['S1', 'S2'], + [{'taxonomy': ['Bacteria', 'Firmicutes']}, + {'taxonomy': ['Bacteria', 'Bacteroidetes']}], + [{'environment': 'A'}, {'environment': 'B'}]) self.assertEqual(obs, exp) def test_head_overstep(self):