From ce93606563f699f24e2a94a50df54c43e693e562 Mon Sep 17 00:00:00 2001
From: Jamie Morton <jamietmorton@gmail.com>
Date: Thu, 28 Jul 2016 14:36:27 -0700
Subject: [PATCH 1/8] Adding in predict and residual querying

---
 gneiss/_summary.py           | 64 ++++++++++++++++++++++++++++++++++++
 gneiss/tests/test_summary.py | 40 ++++++++++++++++++++++
 2 files changed, 104 insertions(+)

diff --git a/gneiss/_summary.py b/gneiss/_summary.py
index 7ca84ba..b09730c 100644
--- a/gneiss/_summary.py
+++ b/gneiss/_summary.py
@@ -127,3 +127,67 @@ def coefficients(self, project=False):
             return c
         else:
             return coef
+
+    def residuals(self, project=False):
+        """ Returns calculated residuals.
+        Parameters
+        ----------
+        X : pd.DataFrame, optional
+            Input table of covariates.  If not specified, then the
+            fitted values calculated from training the model will be
+            returned.
+        project : bool, optional
+            Specifies if coefficients should be projected back into
+            the Aitchison simplex.  If false, the coefficients will be
+            represented as balances  (default: False).
+        Returns
+        -------
+        pd.DataFrame
+            A table of values where rows are coefficients, and the columns
+            are either balances or proportions, depending on the value of
+            `project`.
+        """
+        self._check_projection(project)
+
+        resid = pd.DataFrame()
+
+        for i in range(len(self.results)):
+            err = self.results[i].resid
+            err.name = self.results[i].model.endog_names
+            resid = resid.append(err)
+
+        if project:
+            # check=True due to type issue resolved here
+            # https://github.com/biocore/scikit-bio/pull/1396
+            proj_resid = ilr_inv(resid.values.T, basis=self.basis,
+                                 check=False).T
+            proj_resid = pd.DataFrame(proj_resid, index=self.feature_names,
+                                      columns=resid.columns)
+            return proj_resid
+        else:
+            return resid
+
+    def predict(self, X, project=False):
+        """ Performs a prediction based on model.
+
+        Parameters
+        ----------
+        X : pd.DataFrame, optional
+            Input table of covariates.  If not specified, then the
+            fitted values calculated from training the model will be
+            returned.
+        project : bool, optional
+            Specifies if coefficients should be projected back into
+            the Aitchison simplex.  If false, the coefficients will be
+            represented as balances  (default: False).
+
+        Returns
+        -------
+        pd.DataFrame
+            A table of values where rows are coefficients, and the columns
+            are either balances or proportions, depending on the value of
+            `project`.
+        """
+        self._check_projection(project)
+
+        pass
diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py
index db56876..284fe3f 100644
--- a/gneiss/tests/test_summary.py
+++ b/gneiss/tests/test_summary.py
@@ -99,5 +99,45 @@ def test_regression_results_coefficient_projection(self):
                                check_exact=False,
                                check_less_precise=True)
 
+    def test_regression_results_residuals_projection(self):
+        # aliasing np.array for the sake of pep8
+        A = np.array
+        exp_resid = pd.DataFrame({'s1': ilr_inv(A([-0.986842, -0.236842])),
+                                  's2': ilr_inv(A([-0.065789, -1.815789])),
+                                  's3': ilr_inv(A([1.473684,  0.473684])),
+                                  's4': ilr_inv(A([1.394737, -1.105263])),
+                                  's5': ilr_inv(A([-1.065789,  1.184211])),
+                                  's6': ilr_inv(A([-1.144737, -0.394737])),
+                                  's7': ilr_inv(A([0.394737,  1.894737]))},
+                                 index=['Z1', 'Z2', 'Z3'])
+        feature_names = ['Z1', 'Z2', 'Z3']
+        basis = _gram_schmidt_basis(3)
+        res = RegressionResults(self.results, basis=basis,
+                                feature_names=feature_names)
+        pdt.assert_frame_equal(res.residuals(project=True), exp_resid,
+                               check_exact=False,
+                               check_less_precise=True)
+
+    def test_regression_results_residuals(self):
+        exp_resid = pd.DataFrame({'s1': [-0.986842, -0.236842],
+                                  's2': [-0.065789, -1.815789],
+                                  's3': [1.473684,  0.473684],
+                                  's4': [1.394737, -1.105263],
+                                  's5': [-1.065789,  1.184211],
+                                  's6': [-1.144737, -0.394737],
+                                  's7': [0.394737,  1.894737]},
+                                 index=['Y1', 'Y2'])
+        res = RegressionResults(self.results)
+        pdt.assert_frame_equal(res.residuals(), exp_resid,
+                               check_exact=False,
+                               check_less_precise=True)
+
+    def test_regression_results_predict(self):
+        pass
+
+    def test_regression_results_predict_projection(self):
+        pass
+
+
 if __name__ == "__main__":
     unittest.main()

From 8233fe5b31b9e9936ad23c432dcf80b5dca35399 Mon Sep 17 00:00:00 2001
From: Jamie Morton <jamietmorton@gmail.com>
Date: Sat, 30 Jul 2016 12:56:19 -0700
Subject: [PATCH 2/8] Adding prediction feature

---
 gneiss/_summary.py           | 67 ++++++++++++++++++++++------
 gneiss/tests/test_summary.py | 84 +++++++++++++++++++++++++++++++-----
 2 files changed, 129 insertions(+), 22 deletions(-)

diff --git a/gneiss/_summary.py b/gneiss/_summary.py
index b09730c..5ef83b2 100644
--- a/gneiss/_summary.py
+++ b/gneiss/_summary.py
@@ -15,10 +15,11 @@ class RegressionResults():
     """
     Summary object for storing regression results.
     """
-    def __init__(self, stat_results,
+    def __init__(self,
+                 stat_results,
                  feature_names=None,
                  basis=None):
-        """ Reorganizes statsmodels regression modules.
+        """ Reorganizes statsmodels regression results modules.
 
         Accepts a list of statsmodels RegressionResults objects
         and performs some addition summary statistics.
@@ -78,11 +79,11 @@ def _check_projection(self, project):
         """
         if self.basis is None and project:
             raise ValueError("Cannot perform projection into Aitchison simplex"
-                             "if `basis` is not specified.")
+                             " if `basis` is not specified.")
 
         if self.feature_names is None and project:
             raise ValueError("Cannot perform projection into Aitchison simplex"
-                             "if `feature_names` is not specified.")
+                             " if `feature_names` is not specified.")
 
     def coefficients(self, project=False):
         """ Returns coefficients from fit.
@@ -130,6 +131,7 @@ def coefficients(self, project=False):
 
     def residuals(self, project=False):
         """ Returns calculated residuals.
+
         Parameters
         ----------
         X : pd.DataFrame, optional
@@ -151,9 +153,9 @@ def residuals(self, project=False):
 
         resid = pd.DataFrame()
 
-        for i in range(len(self.results)):
-            err = self.results[i].resid
-            err.name = self.results[i].model.endog_names
+        for r in self.results:
+            err = r.resid
+            err.name = r.model.endog_names
             resid = resid.append(err)
 
         if project:
@@ -167,19 +169,21 @@ def residuals(self, project=False):
         else:
             return resid
 
-    def predict(self, X, project=False):
+    def predict(self, X=None, project=False, **kwargs):
         """ Performs a prediction based on model.
 
         Parameters
         ----------
         X : pd.DataFrame, optional
-            Input table of covariates.  If not specified, then the
-            fitted values calculated from training the model will be
-            returned.
+            Input table of covariates, where columns are covariates, and
+            rows are samples.  If not specified, then the fitted values
+            calculated from training the model will be returned.
         project : bool, optional
             Specifies if coefficients should be projected back into
             the Aitchison simplex.  If false, the coefficients will be
             represented as balances  (default: False).
+        **kwargs : dict
+            Other arguments to be passed into the model prediction.
 
         Returns
         -------
@@ -187,7 +191,46 @@ def predict(self, X, project=False):
             A table of values where rows are coefficients, and the columns
             are either balances or proportions, depending on the value of
             `project`.
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> from gneiss._model import RegressionModel
+        >>> data = pd.DataFrame([[1, 1, 1],
+        ...                      [3, 2, 3],
+        ...                      [4, 3, 2],
+        ...                      [5, 4, 4],
+        ...                      [2, 5, 3],
+        ...                      [3, 6, 5],
+        ...                      [4, 7, 4]],
+        ...                     index=['s1', 's2', 's3', 's4',
+        ...                            's5', 's6', 's7'],
+        ...                     columns=['Y1', 'Y2', 'X'])
+        >>> model = RegressionResults([smf.ols(formula="Y1 ~ X", data=data).fit(),
+        ...                            smf.ols(formula="Y2 ~ X", data=data).fit()])
+        >>> model.predict(data['X'])
         """
         self._check_projection(project)
 
-        pass
+        prediction = pd.DataFrame()
+        for m in self.results:
+            # check if X is none.
+            p = pd.Series(m.predict(X, **kwargs))
+            p.name = m.model.endog_names
+            if X is not None:
+                p.index = X.index
+            else:
+                p.index = m.fittedvalues.index
+            prediction = prediction.append(p)
+
+        if project:
+            # check=True due to type issue resolved here
+            # https://github.com/biocore/scikit-bio/pull/1396
+            proj_prediction = ilr_inv(prediction.values.T, basis=self.basis,
+                                      check=False)
+            proj_prediction = pd.DataFrame(proj_prediction,
+                                           columns=self.feature_names,
+                                           index=prediction.columns)
+            return proj_prediction
+
+        return prediction.T
diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py
index 284fe3f..9d49f05 100644
--- a/gneiss/tests/test_summary.py
+++ b/gneiss/tests/test_summary.py
@@ -19,12 +19,16 @@
 class TestRegressionResults(unittest.TestCase):
 
     def setUp(self):
-        self.data = pd.DataFrame([[1, 3, 4, 5, 2, 3, 4],
-                                  list(range(1, 8)),
-                                  [1, 3, 2, 4, 3, 5, 4]],
-                                 columns=['s1', 's2', 's3', 's4',
+        self.data = pd.DataFrame([[1, 1, 1],
+                                  [3, 2, 3],
+                                  [4, 3, 2],
+                                  [5, 4, 4],
+                                  [2, 5, 3],
+                                  [3, 6, 5],
+                                  [4, 7, 4]],
+                                 index=['s1', 's2', 's3', 's4',
                                           's5', 's6', 's7'],
-                                 index=['Y1', 'Y2', 'X']).T
+                                 columns=['Y1', 'Y2', 'X'])
         model1 = smf.ols(formula="Y1 ~ X", data=self.data)
         model2 = smf.ols(formula="Y2 ~ X", data=self.data)
         self.results = [model1.fit(), model2.fit()]
@@ -99,9 +103,13 @@ def test_regression_results_coefficient_projection(self):
                                check_exact=False,
                                check_less_precise=True)
 
+    def test_regression_results_coefficient_project_error(self):
+        res = RegressionResults(self.results)
+        with self.assertRaises(ValueError):
+            res.coefficients(project=True)
+
     def test_regression_results_residuals_projection(self):
-        # aliasing np.array for the sake of pep8
-        A = np.array
+        A = np.array # aliasing np.array for the sake of pep8
         exp_resid = pd.DataFrame({'s1': ilr_inv(A([-0.986842, -0.236842])),
                                   's2': ilr_inv(A([-0.065789, -1.815789])),
                                   's3': ilr_inv(A([1.473684,  0.473684])),
@@ -133,11 +141,67 @@ def test_regression_results_residuals(self):
                                check_less_precise=True)
 
     def test_regression_results_predict(self):
-        pass
+        model = RegressionResults(self.results)
+        res_predict = model.predict(self.data[['X']])
 
-    def test_regression_results_predict_projection(self):
-        pass
+        exp_predict = pd.DataFrame({'s1': [1.986842, 1.236842],
+                                    's2': [3.065789, 3.815789],
+                                    's3': [2.526316, 2.526316],
+                                    's4': [3.605263, 5.105263],
+                                    's5': [3.065789, 3.815789],
+                                    's6': [4.144737, 6.394737],
+                                    's7': [3.605263, 5.105263]},
+                                   index=['Y1', 'Y2']).T
+
+        pdt.assert_frame_equal(res_predict, exp_predict)
+
+    def test_regression_results_predict_extrapolate(self):
+        model = RegressionResults(self.results)
+        extrapolate = pd.DataFrame({'X': [8, 9, 10]},
+                                   index = ['k1', 'k2', 'k3'])
 
+        res_predict = model.predict(extrapolate)
+
+        exp_predict = pd.DataFrame({'k1': [5.76315789, 10.26315789],
+                                    'k2': [6.30263158, 11.55263158],
+                                    'k3': [6.84210526, 12.84210526]},
+                                   index=['Y1', 'Y2']).T
+
+        pdt.assert_frame_equal(res_predict, exp_predict)
+
+    def test_regression_results_predict_projection(self):
+        feature_names = ['Z1', 'Z2', 'Z3']
+        basis = _gram_schmidt_basis(3)
+        model = RegressionResults(self.results, basis=basis,
+                                  feature_names=feature_names)
+
+        res_predict = model.predict(self.data[['X']], project=True)
+        A = np.array # aliasing np.array for the sake of pep8
+        exp_predict = pd.DataFrame({'s1': ilr_inv(A([1.986842, 1.236842])),
+                                    's2': ilr_inv(A([3.065789, 3.815789])),
+                                    's3': ilr_inv(A([2.526316, 2.526316])),
+                                    's4': ilr_inv(A([3.605263, 5.105263])),
+                                    's5': ilr_inv(A([3.065789, 3.815789])),
+                                    's6': ilr_inv(A([4.144737, 6.394737])),
+                                    's7': ilr_inv(A([3.605263, 5.105263]))},
+                                   index=feature_names).T
+
+        pdt.assert_frame_equal(res_predict, exp_predict)
+
+    def test_regression_results_predict_none(self):
+        model = RegressionResults(self.results)
+        res_predict = model.predict()
+
+        exp_predict = pd.DataFrame({'s1': [1.986842, 1.236842],
+                                    's2': [3.065789, 3.815789],
+                                    's3': [2.526316, 2.526316],
+                                    's4': [3.605263, 5.105263],
+                                    's5': [3.065789, 3.815789],
+                                    's6': [4.144737, 6.394737],
+                                    's7': [3.605263, 5.105263]},
+                                   index=['Y1', 'Y2']).T
+
+        pdt.assert_frame_equal(res_predict, exp_predict)
 
 if __name__ == "__main__":
     unittest.main()

From 51aef4cf334d9e683f13530fce760fcc304ed272 Mon Sep 17 00:00:00 2001
From: Jamie Morton <jamietmorton@gmail.com>
Date: Sat, 30 Jul 2016 12:59:46 -0700
Subject: [PATCH 3/8] pep8

---
 gneiss/_summary.py           | 18 ------------------
 gneiss/tests/test_summary.py | 21 ++++++++++-----------
 ipynb/balance_trees.ipynb    |  2 +-
 3 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/gneiss/_summary.py b/gneiss/_summary.py
index 5ef83b2..89bfd3d 100644
--- a/gneiss/_summary.py
+++ b/gneiss/_summary.py
@@ -191,24 +191,6 @@ def predict(self, X=None, project=False, **kwargs):
             A table of values where rows are coefficients, and the columns
             are either balances or proportions, depending on the value of
             `project`.
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> from gneiss._model import RegressionModel
-        >>> data = pd.DataFrame([[1, 1, 1],
-        ...                      [3, 2, 3],
-        ...                      [4, 3, 2],
-        ...                      [5, 4, 4],
-        ...                      [2, 5, 3],
-        ...                      [3, 6, 5],
-        ...                      [4, 7, 4]],
-        ...                     index=['s1', 's2', 's3', 's4',
-        ...                            's5', 's6', 's7'],
-        ...                     columns=['Y1', 'Y2', 'X'])
-        >>> model = RegressionResults([smf.ols(formula="Y1 ~ X", data=data).fit(),
-        ...                            smf.ols(formula="Y2 ~ X", data=data).fit()])
-        >>> model.predict(data['X'])
         """
         self._check_projection(project)
 
diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py
index 9d49f05..ef9e15d 100644
--- a/gneiss/tests/test_summary.py
+++ b/gneiss/tests/test_summary.py
@@ -27,7 +27,7 @@ def setUp(self):
                                   [3, 6, 5],
                                   [4, 7, 4]],
                                  index=['s1', 's2', 's3', 's4',
-                                          's5', 's6', 's7'],
+                                        's5', 's6', 's7'],
                                  columns=['Y1', 'Y2', 'X'])
         model1 = smf.ols(formula="Y1 ~ X", data=self.data)
         model2 = smf.ols(formula="Y2 ~ X", data=self.data)
@@ -109,14 +109,14 @@ def test_regression_results_coefficient_project_error(self):
             res.coefficients(project=True)
 
     def test_regression_results_residuals_projection(self):
-        A = np.array # aliasing np.array for the sake of pep8
+        A = np.array  # aliasing np.array for the sake of pep8
         exp_resid = pd.DataFrame({'s1': ilr_inv(A([-0.986842, -0.236842])),
                                   's2': ilr_inv(A([-0.065789, -1.815789])),
-                                  's3': ilr_inv(A([1.473684,  0.473684])),
+                                  's3': ilr_inv(A([1.473684, 0.473684])),
                                   's4': ilr_inv(A([1.394737, -1.105263])),
-                                  's5': ilr_inv(A([-1.065789,  1.184211])),
+                                  's5': ilr_inv(A([-1.065789, 1.184211])),
                                   's6': ilr_inv(A([-1.144737, -0.394737])),
-                                  's7': ilr_inv(A([0.394737,  1.894737]))},
+                                  's7': ilr_inv(A([0.394737, 1.894737]))},
                                  index=['Z1', 'Z2', 'Z3'])
         feature_names = ['Z1', 'Z2', 'Z3']
         basis = _gram_schmidt_basis(3)
@@ -129,11 +129,11 @@ def test_regression_results_residuals_projection(self):
     def test_regression_results_residuals(self):
         exp_resid = pd.DataFrame({'s1': [-0.986842, -0.236842],
                                   's2': [-0.065789, -1.815789],
-                                  's3': [1.473684,  0.473684],
+                                  's3': [1.473684, 0.473684],
                                   's4': [1.394737, -1.105263],
-                                  's5': [-1.065789,  1.184211],
+                                  's5': [-1.065789, 1.184211],
                                   's6': [-1.144737, -0.394737],
-                                  's7': [0.394737,  1.894737]},
+                                  's7': [0.394737, 1.894737]},
                                  index=['Y1', 'Y2'])
         res = RegressionResults(self.results)
         pdt.assert_frame_equal(res.residuals(), exp_resid,
@@ -158,8 +158,7 @@ def test_regression_results_predict(self):
     def test_regression_results_predict_extrapolate(self):
         model = RegressionResults(self.results)
         extrapolate = pd.DataFrame({'X': [8, 9, 10]},
-                                   index = ['k1', 'k2', 'k3'])
-
+                                   index=['k1', 'k2', 'k3'])
         res_predict = model.predict(extrapolate)
 
         exp_predict = pd.DataFrame({'k1': [5.76315789, 10.26315789],
@@ -176,7 +175,7 @@ def test_regression_results_predict_projection(self):
                                   feature_names=feature_names)
 
         res_predict = model.predict(self.data[['X']], project=True)
-        A = np.array # aliasing np.array for the sake of pep8
+        A = np.array  # aliasing np.array for the sake of pep8
         exp_predict = pd.DataFrame({'s1': ilr_inv(A([1.986842, 1.236842])),
                                     's2': ilr_inv(A([3.065789, 3.815789])),
                                     's3': ilr_inv(A([2.526316, 2.526316])),
diff --git a/ipynb/balance_trees.ipynb b/ipynb/balance_trees.ipynb
index 13d41be..16fcd27 100644
--- a/ipynb/balance_trees.ipynb
+++ b/ipynb/balance_trees.ipynb
@@ -459,7 +459,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,

From 6cad36bbaf7e6f985368298a81fe909389f3f9ed Mon Sep 17 00:00:00 2001
From: Jamie Morton <jamietmorton@gmail.com>
Date: Sat, 30 Jul 2016 18:04:29 -0700
Subject: [PATCH 4/8] Fixing orientation of residuals

---
 gneiss/_summary.py           | 9 ++++-----
 gneiss/tests/test_summary.py | 4 ++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/gneiss/_summary.py b/gneiss/_summary.py
index 89bfd3d..768040e 100644
--- a/gneiss/_summary.py
+++ b/gneiss/_summary.py
@@ -145,7 +145,7 @@ def residuals(self, project=False):
         Returns
         -------
         pd.DataFrame
-            A table of values where rows are coefficients, and the columns
+            A table of values where rows are samples, and the columns
             are either balances or proportions, depending on the value of
             `project`.
         """
@@ -163,11 +163,10 @@ def residuals(self, project=False):
             # https://github.com/biocore/scikit-bio/pull/1396
             proj_resid = ilr_inv(resid.values.T, basis=self.basis,
                                  check=False).T
-            proj_resid = pd.DataFrame(proj_resid, index=self.feature_names,
-                                      columns=resid.columns)
-            return proj_resid
+            return  pd.DataFrame(proj_resid, index=self.feature_names,
+                                 columns=resid.columns).T
         else:
-            return resid
+            return resid.T
 
     def predict(self, X=None, project=False, **kwargs):
         """ Performs a prediction based on model.
diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py
index ef9e15d..1b11c37 100644
--- a/gneiss/tests/test_summary.py
+++ b/gneiss/tests/test_summary.py
@@ -117,7 +117,7 @@ def test_regression_results_residuals_projection(self):
                                   's5': ilr_inv(A([-1.065789, 1.184211])),
                                   's6': ilr_inv(A([-1.144737, -0.394737])),
                                   's7': ilr_inv(A([0.394737, 1.894737]))},
-                                 index=['Z1', 'Z2', 'Z3'])
+                                 index=['Z1', 'Z2', 'Z3']).T
         feature_names = ['Z1', 'Z2', 'Z3']
         basis = _gram_schmidt_basis(3)
         res = RegressionResults(self.results, basis=basis,
@@ -134,7 +134,7 @@ def test_regression_results_residuals(self):
                                   's5': [-1.065789, 1.184211],
                                   's6': [-1.144737, -0.394737],
                                   's7': [0.394737, 1.894737]},
-                                 index=['Y1', 'Y2'])
+                                 index=['Y1', 'Y2']).T
         res = RegressionResults(self.results)
         pdt.assert_frame_equal(res.residuals(), exp_resid,
                                check_exact=False,

From 501265e145647f92d646183da81aedfbb58fd104 Mon Sep 17 00:00:00 2001
From: Jamie Morton <jamietmorton@gmail.com>
Date: Sat, 30 Jul 2016 18:05:32 -0700
Subject: [PATCH 5/8] pep8

---
 gneiss/_summary.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/gneiss/_summary.py b/gneiss/_summary.py
index 768040e..baff002 100644
--- a/gneiss/_summary.py
+++ b/gneiss/_summary.py
@@ -163,8 +163,8 @@ def residuals(self, project=False):
             # https://github.com/biocore/scikit-bio/pull/1396
             proj_resid = ilr_inv(resid.values.T, basis=self.basis,
                                  check=False).T
-            return  pd.DataFrame(proj_resid, index=self.feature_names,
-                                 columns=resid.columns).T
+            return pd.DataFrame(proj_resid, index=self.feature_names,
+                                columns=resid.columns).T
         else:
             return resid.T
 
@@ -209,9 +209,7 @@ def predict(self, X=None, project=False, **kwargs):
             # https://github.com/biocore/scikit-bio/pull/1396
             proj_prediction = ilr_inv(prediction.values.T, basis=self.basis,
                                       check=False)
-            proj_prediction = pd.DataFrame(proj_prediction,
-                                           columns=self.feature_names,
-                                           index=prediction.columns)
-            return proj_prediction
-
+            return pd.DataFrame(proj_prediction,
+                                columns=self.feature_names,
+                                index=prediction.columns)
         return prediction.T

From 92bba97a9174f7225aba7bea13326e923bd907ba Mon Sep 17 00:00:00 2001
From: Jamie Morton <jamietmorton@gmail.com>
Date: Tue, 2 Aug 2016 13:30:51 -0400
Subject: [PATCH 6/8] Adding comments for further clarification on project

---
 gneiss/_summary.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/gneiss/_summary.py b/gneiss/_summary.py
index db33cde..b056a22 100644
--- a/gneiss/_summary.py
+++ b/gneiss/_summary.py
@@ -160,8 +160,10 @@ def residuals(self, project=False):
             resid = resid.append(err)
 
         if project:
-            # check=True due to type issue resolved here
-            # https://github.com/biocore/scikit-bio/pull/1396
+            # `check=False`, due to a problem with error handling
+            # addressed here https://github.com/biocore/scikit-bio/pull/1396
+            # This will need to be fixed here:
+            # https://github.com/biocore/gneiss/issues/34
             proj_resid = ilr_inv(resid.values.T, basis=self.basis,
                                  check=False).T
             return pd.DataFrame(proj_resid, index=self.feature_names,
@@ -206,8 +208,10 @@ def predict(self, X=None, project=False, **kwargs):
             prediction = prediction.append(p)
 
         if project:
-            # check=True due to type issue resolved here
-            # https://github.com/biocore/scikit-bio/pull/1396
+            # `check=False`, due to a problem with error handling
+            # addressed here https://github.com/biocore/scikit-bio/pull/1396
+            # This will need to be fixed here:
+            # https://github.com/biocore/gneiss/issues/34
             proj_prediction = ilr_inv(prediction.values.T, basis=self.basis,
                                       check=False)
             return pd.DataFrame(proj_prediction,

From b0dfdff44a890e78edb296cf637b3994ce93d08f Mon Sep 17 00:00:00 2001
From: Jamie Morton <jamietmorton@gmail.com>
Date: Tue, 2 Aug 2016 13:36:40 -0400
Subject: [PATCH 7/8] TST: Adding in tests for residual and predict

---
 gneiss/tests/test_summary.py | 95 ++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py
index fc246d2..84f6523 100644
--- a/gneiss/tests/test_summary.py
+++ b/gneiss/tests/test_summary.py
@@ -108,5 +108,100 @@ def test_regression_results_coefficient_project_error(self):
         with self.assertRaises(ValueError):
             res.coefficients(project=True)
 
+    def test_regression_results_residuals_projection(self):
+        A = np.array  # aliasing np.array for the sake of pep8
+        exp_resid = pd.DataFrame({'s1': ilr_inv(A([-0.986842, -0.236842])),
+                                  's2': ilr_inv(A([-0.065789, -1.815789])),
+                                  's3': ilr_inv(A([1.473684, 0.473684])),
+                                  's4': ilr_inv(A([1.394737, -1.105263])),
+                                  's5': ilr_inv(A([-1.065789, 1.184211])),
+                                  's6': ilr_inv(A([-1.144737, -0.394737])),
+                                  's7': ilr_inv(A([0.394737, 1.894737]))},
+                                 index=['Z1', 'Z2', 'Z3']).T
+        feature_names = ['Z1', 'Z2', 'Z3']
+        basis = _gram_schmidt_basis(3)
+        res = RegressionResults(self.results, basis=basis,
+                                feature_names=feature_names)
+        pdt.assert_frame_equal(res.residuals(project=True), exp_resid,
+                               check_exact=False,
+                               check_less_precise=True)
+
+    def test_regression_results_residuals(self):
+        exp_resid = pd.DataFrame({'s1': [-0.986842, -0.236842],
+                                  's2': [-0.065789, -1.815789],
+                                  's3': [1.473684, 0.473684],
+                                  's4': [1.394737, -1.105263],
+                                  's5': [-1.065789, 1.184211],
+                                  's6': [-1.144737, -0.394737],
+                                  's7': [0.394737, 1.894737]},
+                                 index=['Y1', 'Y2']).T
+        res = RegressionResults(self.results)
+        pdt.assert_frame_equal(res.residuals(), exp_resid,
+                               check_exact=False,
+                               check_less_precise=True)
+
+    def test_regression_results_predict(self):
+        model = RegressionResults(self.results)
+        res_predict = model.predict(self.data[['X']])
+
+        exp_predict = pd.DataFrame({'s1': [1.986842, 1.236842],
+                                    's2': [3.065789, 3.815789],
+                                    's3': [2.526316, 2.526316],
+                                    's4': [3.605263, 5.105263],
+                                    's5': [3.065789, 3.815789],
+                                    's6': [4.144737, 6.394737],
+                                    's7': [3.605263, 5.105263]},
+                                   index=['Y1', 'Y2']).T
+
+        pdt.assert_frame_equal(res_predict, exp_predict)
+
+    def test_regression_results_predict_extrapolate(self):
+        model = RegressionResults(self.results)
+        extrapolate = pd.DataFrame({'X': [8, 9, 10]},
+                                   index=['k1', 'k2', 'k3'])
+        res_predict = model.predict(extrapolate)
+
+        exp_predict = pd.DataFrame({'k1': [5.76315789, 10.26315789],
+                                    'k2': [6.30263158, 11.55263158],
+                                    'k3': [6.84210526, 12.84210526]},
+                                   index=['Y1', 'Y2']).T
+
+        pdt.assert_frame_equal(res_predict, exp_predict)
+
+    def test_regression_results_predict_projection(self):
+        feature_names = ['Z1', 'Z2', 'Z3']
+        basis = _gram_schmidt_basis(3)
+        model = RegressionResults(self.results, basis=basis,
+                                  feature_names=feature_names)
+
+        res_predict = model.predict(self.data[['X']], project=True)
+        A = np.array  # aliasing np.array for the sake of pep8
+        exp_predict = pd.DataFrame({'s1': ilr_inv(A([1.986842, 1.236842])),
+                                    's2': ilr_inv(A([3.065789, 3.815789])),
+                                    's3': ilr_inv(A([2.526316, 2.526316])),
+                                    's4': ilr_inv(A([3.605263, 5.105263])),
+                                    's5': ilr_inv(A([3.065789, 3.815789])),
+                                    's6': ilr_inv(A([4.144737, 6.394737])),
+                                    's7': ilr_inv(A([3.605263, 5.105263]))},
+                                   index=feature_names).T
+
+        pdt.assert_frame_equal(res_predict, exp_predict)
+
+    def test_regression_results_predict_none(self):
+        model = RegressionResults(self.results)
+        res_predict = model.predict()
+
+        exp_predict = pd.DataFrame({'s1': [1.986842, 1.236842],
+                                    's2': [3.065789, 3.815789],
+                                    's3': [2.526316, 2.526316],
+                                    's4': [3.605263, 5.105263],
+                                    's5': [3.065789, 3.815789],
+                                    's6': [4.144737, 6.394737],
+                                    's7': [3.605263, 5.105263]},
+                                   index=['Y1', 'Y2']).T
+
+        pdt.assert_frame_equal(res_predict, exp_predict)
+
+
 if __name__ == "__main__":
     unittest.main()

From 855078e0397361fc959a7995dc3832922662b6c1 Mon Sep 17 00:00:00 2001
From: Jamie Morton <jamietmorton@gmail.com>
Date: Tue, 9 Aug 2016 11:21:35 -0400
Subject: [PATCH 8/8] Addressing @josenava's comments

---
 gneiss/_summary.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/gneiss/_summary.py b/gneiss/_summary.py
index b056a22..2bbf326 100644
--- a/gneiss/_summary.py
+++ b/gneiss/_summary.py
@@ -92,7 +92,7 @@ def coefficients(self, project=False):
         ----------
         project : bool, optional
             Specifies if coefficients should be projected back into
-            the Aitchison simplex.  If false, the coefficients will be
+            the Aitchison simplex [1]_.  If false, the coefficients will be
             represented as balances  (default: False).
 
         Returns
@@ -110,6 +110,11 @@ def coefficients(self, project=False):
         ValueError:
             Cannot perform projection into Aitchison simplex
             if `feature_names` is not specified.
+
+        References
+        ----------
+        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
+           CDA work." Girona 24 (2003): 73-81.
         """
         self._check_projection(project)
         coef = pd.DataFrame()
@@ -141,7 +146,7 @@ def residuals(self, project=False):
             returned.
         project : bool, optional
             Specifies if coefficients should be projected back into
-            the Aitchison simplex.  If false, the coefficients will be
+            the Aitchison simplex [1]_.  If false, the coefficients will be
             represented as balances  (default: False).
         Returns
         -------
@@ -149,6 +154,11 @@ def residuals(self, project=False):
             A table of values where rows are samples, and the columns
             are either balances or proportions, depending on the value of
             `project`.
+
+        References
+        ----------
+        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
+           CDA work." Girona 24 (2003): 73-81.
         """
         self._check_projection(project)
 
@@ -182,7 +192,7 @@ def predict(self, X=None, project=False, **kwargs):
             calculated from training the model will be returned.
         project : bool, optional
             Specifies if coefficients should be projected back into
-            the Aitchison simplex.  If false, the coefficients will be
+            the Aitchison simplex [1]_.  If false, the coefficients will be
             represented as balances  (default: False).
         **kwargs : dict
             Other arguments to be passed into the model prediction.
@@ -193,6 +203,11 @@ def predict(self, X=None, project=False, **kwargs):
             A table of values where rows are coefficients, and the columns
             are either balances or proportions, depending on the value of
             `project`.
+
+        References
+        ----------
+        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
+           CDA work." Girona 24 (2003): 73-81.
         """
         self._check_projection(project)