From 54e8aa0243af9b62153a04d68e9061e4e635236e Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Thu, 28 Jul 2016 14:09:42 -0700 Subject: [PATCH 1/9] Adding in coefficient querying --- gneiss/_summary.py | 58 ++++++++++++++++++++++++++++++++++++ gneiss/tests/test_summary.py | 41 +++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index ed70f4d..583c576 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -57,3 +57,61 @@ def __init__(self, stat_results, # calculate the overall coefficient of determination (i.e. R2) sst = sse + ssr self.r2 = 1 - sse / sst + + def _check_projection(self, project): + """ + Parameters + ---------- + project : bool + Specifies if a projection into the Aitchison simplex can be performed. + Raises + ------ + ValueError: + Cannot perform projection into Aitchison simplex if `basis` + is not specified. + ValueError: + Cannot perform projection into Aitchison simplex + if `feature_names` is not specified. + """ + if self.basis is None and project: + raise ValueError("Cannot perform projection into Aitchison simplex" + "if `basis` is not specified.") + + if self.feature_names is None and project: + raise ValueError("Cannot perform projection into Aitchison simplex" + "if `feature_names` is not specified.") + + def coefficients(self, project=False): + """ Returns coefficients from fit. + + Parameters + ---------- + project : bool, optional + Specifies if coefficients should be projected back into + the Aitchison simplex. If false, the coefficients will be + represented as balances (default: False). + + Returns + ------- + pd.DataFrame + A table of values where columns are coefficients, and the index + is either balances or proportions, depending on the value of + `project`. + """ + self._check_projection(project) + coef = pd.DataFrame() + + for i in range(len(self.results)): + c = self.results[i].params + c.name = self.results[i].model.endog_names + coef = coef.append(c) + + if project: + # `check=True` due to type issue resolved here + # https://github.com/biocore/scikit-bio/pull/1396 + c = ilr_inv(coef.values.T, basis=self.basis, check=False).T + c = pd.DataFrame(c, index=self.feature_names, + columns=coef.columns) + return c + else: + return coef diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index dbd2cdf..14a4c3e 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -55,5 +55,46 @@ def test_regression_results_pvalues(self): check_exact=False, check_less_precise=True) + def test_check_projection(self): + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + res = RegressionResults(self.results, basis=basis, + feature_names=feature_names) + + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + + # Test if feature_names is checked for + res = RegressionResults(self.results, basis=basis) + with self.assertRaises(ValueError): + res._check_projection(True) + + # Test if basis is checked for + res = RegressionResults(self.results, feature_names=feature_names) + with self.assertRaises(ValueError): + res._check_projection(True) + + def test_regression_results_coefficient(self): + exp_coef = pd.DataFrame({'Intercept' : [1.447368, -0.052632], + 'X' : [0.539474, 1.289474]}, + index=['Y1', 'Y2']) + res = RegressionResults(self.results) + pdt.assert_frame_equal(res.coefficients(), exp_coef, + check_exact=False, + check_less_precise=True) + + def test_regression_results_coefficient_projection(self): + exp_coef = pd.DataFrame({'Intercept' : ilr_inv(np.array([[1.447368, -0.052632]])), + 'X' : ilr_inv(np.array([[0.539474, 1.289474]]))}, + index=['Z1', 'Z2', 'Z3']) + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + res = RegressionResults(self.results, basis=basis, + feature_names=feature_names) + + pdt.assert_frame_equal(res.coefficients(project=True), exp_coef, + check_exact=False, + check_less_precise=True) + if __name__ == "__main__": unittest.main() From 9d8e5bf3c04e7ec90f96d5181f3af9f6316af494 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Thu, 28 Jul 2016 14:10:33 -0700 Subject: [PATCH 2/9] STY: Clean up looping --- gneiss/_summary.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index 583c576..f8767a6 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -101,9 +101,9 @@ def coefficients(self, project=False): self._check_projection(project) coef = pd.DataFrame() - for i in range(len(self.results)): - c = self.results[i].params - c.name = self.results[i].model.endog_names + for r in self.results: + c = r.params + c.name = r.model.endog_names coef = coef.append(c) if project: From c378361f946ffdb06956e36ed59f900123b0a4f6 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Thu, 28 Jul 2016 14:13:14 -0700 Subject: [PATCH 3/9] Adding appropriate imports --- gneiss/_summary.py | 14 +++++++++++++- gneiss/tests/test_summary.py | 13 ++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index f8767a6..7ca84ba 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -8,6 +8,7 @@ # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- import pandas as pd +from skbio.stats.composition import ilr_inv class RegressionResults(): @@ -63,7 +64,9 @@ def _check_projection(self, project): Parameters ---------- project : bool - Specifies if a projection into the Aitchison simplex can be performed. + Specifies if a projection into the Aitchison simplex can be + performed. + Raises ------ ValueError: @@ -97,6 +100,15 @@ def coefficients(self, project=False): A table of values where columns are coefficients, and the index is either balances or proportions, depending on the value of `project`. + + Raises + ------ + ValueError: + Cannot perform projection into Aitchison simplex if `basis` + is not specified. + ValueError: + Cannot perform projection into Aitchison simplex + if `feature_names` is not specified. """ self._check_projection(project) coef = pd.DataFrame() diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index 14a4c3e..db56876 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -7,11 +7,13 @@ # # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- +import numpy as np import pandas as pd import pandas.util.testing as pdt import statsmodels.formula.api as smf import unittest from gneiss._summary import RegressionResults +from skbio.stats.composition import _gram_schmidt_basis, ilr_inv class TestRegressionResults(unittest.TestCase): @@ -75,8 +77,8 @@ def test_check_projection(self): res._check_projection(True) def test_regression_results_coefficient(self): - exp_coef = pd.DataFrame({'Intercept' : [1.447368, -0.052632], - 'X' : [0.539474, 1.289474]}, + exp_coef = pd.DataFrame({'Intercept': [1.447368, -0.052632], + 'X': [0.539474, 1.289474]}, index=['Y1', 'Y2']) res = RegressionResults(self.results) pdt.assert_frame_equal(res.coefficients(), exp_coef, @@ -84,9 +86,10 @@ def test_regression_results_coefficient(self): check_less_precise=True) def test_regression_results_coefficient_projection(self): - exp_coef = pd.DataFrame({'Intercept' : ilr_inv(np.array([[1.447368, -0.052632]])), - 'X' : ilr_inv(np.array([[0.539474, 1.289474]]))}, - index=['Z1', 'Z2', 'Z3']) + exp_coef = pd.DataFrame( + {'Intercept': ilr_inv(np.array([[1.447368, -0.052632]])), + 'X': ilr_inv(np.array([[0.539474, 1.289474]]))}, + index=['Z1', 'Z2', 'Z3']) feature_names = ['Z1', 'Z2', 'Z3'] basis = _gram_schmidt_basis(3) res = RegressionResults(self.results, basis=basis, From b12fe8aabacc2161a2aca4eefed72aceff3dba1b Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Thu, 28 Jul 2016 14:38:29 -0700 Subject: [PATCH 4/9] Adding test for ValueError --- gneiss/tests/test_summary.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index db56876..fb72500 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -99,5 +99,13 @@ def test_regression_results_coefficient_projection(self): check_exact=False, check_less_precise=True) + def test_regression_results_coefficient_project_error(self): + exp_coef = pd.DataFrame({'Intercept' : [1.447368, -0.052632], + 'X' : [0.539474, 1.289474]}, + index=['Y1', 'Y2']) + res = RegressionResults(self.results) + with self.assertRaises(ValueError): + res.coefficients(project=True) + if __name__ == "__main__": unittest.main() From f3b7d9472d3eb4ebebe004b42141f66d20f9461a Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Thu, 28 Jul 2016 14:39:06 -0700 Subject: [PATCH 5/9] pep8 --- gneiss/tests/test_summary.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index fb72500..ff4932d 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -100,9 +100,6 @@ def test_regression_results_coefficient_projection(self): check_less_precise=True) def test_regression_results_coefficient_project_error(self): - exp_coef = pd.DataFrame({'Intercept' : [1.447368, -0.052632], - 'X' : [0.539474, 1.289474]}, - index=['Y1', 'Y2']) res = RegressionResults(self.results) with self.assertRaises(ValueError): res.coefficients(project=True) From b3e11d636a0e61322dbd81f8559be12efac0c141 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Sat, 30 Jul 2016 17:38:27 -0700 Subject: [PATCH 6/9] Addressing @antgonza's comments --- gneiss/_summary.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index 7ca84ba..5a2e719 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -122,8 +122,7 @@ def coefficients(self, project=False): # `check=True` due to type issue resolved here # https://github.com/biocore/scikit-bio/pull/1396 c = ilr_inv(coef.values.T, basis=self.basis, check=False).T - c = pd.DataFrame(c, index=self.feature_names, - columns=coef.columns) - return c + return pd.DataFrame(c, index=self.feature_names, + columns=coef.columns) else: return coef From 5cef94859278ddbf77a5548d74130cdc3a33f9bb Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Tue, 2 Aug 2016 12:32:09 -0400 Subject: [PATCH 7/9] Clarifying ilr_inv --- gneiss/_summary.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index 5a2e719..b135ce4 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -119,8 +119,10 @@ def coefficients(self, project=False): coef = coef.append(c) if project: - # `check=True` due to type issue resolved here - # https://github.com/biocore/scikit-bio/pull/1396 + # `check=False`, due to a problem with error handling + # address here https://github.com/biocore/scikit-bio/pull/1396 + # This will need to be fixed here: + # https://github.com/biocore/gneiss/issues/34 c = ilr_inv(coef.values.T, basis=self.basis, check=False).T return pd.DataFrame(c, index=self.feature_names, columns=coef.columns) From 88ac939594750799241e7f1ed7b33dc818f57b9d Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Tue, 2 Aug 2016 12:36:19 -0400 Subject: [PATCH 8/9] address -> addressed --- gneiss/_summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index b135ce4..4ccbd84 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -120,7 +120,7 @@ def coefficients(self, project=False): if project: # `check=False`, due to a problem with error handling - # address here https://github.com/biocore/scikit-bio/pull/1396 + # addressed here https://github.com/biocore/scikit-bio/pull/1396 # This will need to be fixed here: # https://github.com/biocore/gneiss/issues/34 c = ilr_inv(coef.values.T, basis=self.basis, check=False).T From be4a07e531b2f6088b5bebdb8f1c12ba62921f4e Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Tue, 2 Aug 2016 12:40:08 -0400 Subject: [PATCH 9/9] removing trailing whitespace --- gneiss/_summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index 4ccbd84..a39d504 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -121,7 +121,7 @@ def coefficients(self, project=False): if project: # `check=False`, due to a problem with error handling # addressed here https://github.com/biocore/scikit-bio/pull/1396 - # This will need to be fixed here: + # This will need to be fixed here: # https://github.com/biocore/gneiss/issues/34 c = ilr_inv(coef.values.T, basis=self.basis, check=False).T return pd.DataFrame(c, index=self.feature_names,