From fe40ba53d30439ff4bc3194aea3695dc95723524 Mon Sep 17 00:00:00 2001
From: Christian Diener <ch.diener@gmail.com>
Date: Thu, 7 Mar 2019 11:51:59 -0800
Subject: [PATCH 1/4] specify fill value

---
 biom/table.py            | 9 +++++----
 biom/tests/test_table.py | 9 ++++++++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/biom/table.py b/biom/table.py
index 30a86035..f243cd6f 100644
--- a/biom/table.py
+++ b/biom/table.py
@@ -178,7 +178,7 @@
 from copy import deepcopy
 from datetime import datetime
 from json import dumps
-from functools import reduce
+from functools import reduce, partial
 from operator import itemgetter
 from future.builtins import zip
 from future.utils import viewitems
@@ -4045,9 +4045,10 @@ def to_dataframe(self, dense=False):
             mat = self.matrix_data.toarray()
             constructor = pd.DataFrame
         else:
-            mat = [pd.SparseSeries(r.toarray().squeeze())
-                   for r in self.matrix_data.tocsr()]
-            constructor = pd.SparseDataFrame
+            mat = self.matrix_data
+            constructor = partial(pd.SparseDataFrame,
+                                  default_fill_value=0,
+                                  copy=True)
 
         return constructor(mat, index=index, columns=columns)
 
diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py
index a1245892..0503551b 100644
--- a/biom/tests/test_table.py
+++ b/biom/tests/test_table.py
@@ -1475,10 +1475,17 @@ def test_add_group_metadata_w_existing_metadata(self):
     def test_to_dataframe(self):
         exp = pd.SparseDataFrame(np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]),
                                  index=['O1', 'O2'],
-                                 columns=['S1', 'S2', 'S3'])
+                                 columns=['S1', 'S2', 'S3'],
+                                 default_fill_value = 0.0)
         obs = example_table.to_dataframe()
         pdt.assert_frame_equal(obs, exp)
 
+    def test_to_dataframe_is_sparse(self):
+        df = example_table.to_dataframe()
+        density = (example_table.matrix_data.getnnz() /
+                   np.prod(example_table.shape))
+        assert np.allclose(df.density, density)
+
     def test_to_dataframe_dense(self):
         exp = pd.DataFrame(np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]),
                            index=['O1', 'O2'],

From bbf66b34c452810bac64a2e2d8f9a589a143aa8e Mon Sep 17 00:00:00 2001
From: Christian Diener <ch.diener@gmail.com>
Date: Thu, 7 Mar 2019 13:10:33 -0800
Subject: [PATCH 2/4] fix flake8

---
 biom/tests/test_table.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py
index 0503551b..5a881bdc 100644
--- a/biom/tests/test_table.py
+++ b/biom/tests/test_table.py
@@ -1476,7 +1476,7 @@ def test_to_dataframe(self):
         exp = pd.SparseDataFrame(np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]),
                                  index=['O1', 'O2'],
                                  columns=['S1', 'S2', 'S3'],
-                                 default_fill_value = 0.0)
+                                 default_fill_value=0.0)
         obs = example_table.to_dataframe()
         pdt.assert_frame_equal(obs, exp)
 

From 6c11d54e87473700b952123e5d4975aea2ae2407 Mon Sep 17 00:00:00 2001
From: Christian Diener <ch.diener@gmail.com>
Date: Thu, 7 Mar 2019 13:16:57 -0800
Subject: [PATCH 3/4] add to changelog

---
 ChangeLog.md | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/ChangeLog.md b/ChangeLog.md
index cb27d306..4bfeb922 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -10,21 +10,23 @@ New Features:
 
 Bug fixes:
 
+* `Table.to_dataframe(dense=False)` does now correctly produce sparse data frames (and not accidentally dense ones as before)
+
 biom 2.1.7
 ----------
 
 New features and bug fixes, released on 28 September 2018.
 
-Important: 
+Important:
 
 * Python 3.4 support has been dropped. We now only support Python 2.7, 3.5, 3.6 and 3.7.
 * We will be dropping Python 2.7 support on the next release.
-* Pandas >= 0.20.0 is now the minimum required version. 
+* Pandas >= 0.20.0 is now the minimum required version.
 * pytest is now used instead of nose.
 
 New Features:
 
-* Massive performance boost to `Table.collapse` with the default collapse function. The difference was 10s of milliseconds vs. minutes stemming from prior use of `operator.add`. See [issue #761](https://github.com/biocore/biom-format/issues/761). 
+* Massive performance boost to `Table.collapse` with the default collapse function. The difference was 10s of milliseconds vs. minutes stemming from prior use of `operator.add`. See [issue #761](https://github.com/biocore/biom-format/issues/761).
 * `Table.align_to` for aligning one table to another. This is useful in multi-omic analyses where multiple preparations have been performed on the sample physical samples. This is essentially a helper method around `Table.sort_order`. See [issue #747](https://github.com/biocore/biom-format/issues/747).
 * Added additional sanity checks when calling `Table.to_hdf5`, see [PR #769](https://github.com/biocore/biom-format/pull/769).
 * `Table.subsample()` can optionally perform subsampling with replacement. See [issue #774](https://github.com/biocore/biom-format/issues/774).
@@ -47,7 +49,7 @@ New Features:
 * `Table.from_hdf5` now supports a rapid subset in the event that metadata is
    not needed. In benchmarking against the Earth Microbiome Project BIOM table,
    the reduction in runtime was multiple orders of magnitude while additionally
-   preserving substantial memory. 
+   preserving substantial memory.
 * `Table.rankdata` has been added to convert values to ranked abundances on
   either axis. See [issue #645](https://github.com/biocore/biom-format/issues/639).
 * Format of numbers in ``biom summarize-table`` output is now more readable and localized. See [issue #679](https://github.com/biocore/biom-format/issues/679).
@@ -105,8 +107,8 @@ Bug fixes:
 * `biom --version` now prints the software version (previously the individual
   commands did this, but not the base command).
 * `Table.vlen_list_of_str_formatter` was considering a `str` to be valid for
-  formatting resulting in an obscure error when a `str`, as opposed to a 
-  `list` of `str`, was used for taxonomy. See 
+  formatting resulting in an obscure error when a `str`, as opposed to a
+  `list` of `str`, was used for taxonomy. See
   [issue #709](https://github.com/biocore/biom-format/issues/709).
 
 biom 2.1.4

From f9676d8ea4b98873ec242db661d28a06a028af03 Mon Sep 17 00:00:00 2001
From: Christian Diener <ch.diener@gmail.com>
Date: Thu, 7 Mar 2019 13:19:37 -0800
Subject: [PATCH 4/4] fix test for Python 2

---
 biom/tests/test_table.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py
index 5a881bdc..2a88f62b 100644
--- a/biom/tests/test_table.py
+++ b/biom/tests/test_table.py
@@ -1482,7 +1482,7 @@ def test_to_dataframe(self):
 
     def test_to_dataframe_is_sparse(self):
         df = example_table.to_dataframe()
-        density = (example_table.matrix_data.getnnz() /
+        density = (float(example_table.matrix_data.getnnz()) /
                    np.prod(example_table.shape))
         assert np.allclose(df.density, density)