diff --git a/gneiss/_summary.py b/gneiss/_summary.py index ed70f4d..a39d504 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -8,6 +8,7 @@ # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- import pandas as pd +from skbio.stats.composition import ilr_inv class RegressionResults(): @@ -57,3 +58,73 @@ def __init__(self, stat_results, # calculate the overall coefficient of determination (i.e. R2) sst = sse + ssr self.r2 = 1 - sse / sst + + def _check_projection(self, project): + """ + Parameters + ---------- + project : bool + Specifies if a projection into the Aitchison simplex can be + performed. + + Raises + ------ + ValueError: + Cannot perform projection into Aitchison simplex if `basis` + is not specified. + ValueError: + Cannot perform projection into Aitchison simplex + if `feature_names` is not specified. + """ + if self.basis is None and project: + raise ValueError("Cannot perform projection into Aitchison simplex" + "if `basis` is not specified.") + + if self.feature_names is None and project: + raise ValueError("Cannot perform projection into Aitchison simplex" + "if `feature_names` is not specified.") + + def coefficients(self, project=False): + """ Returns coefficients from fit. + + Parameters + ---------- + project : bool, optional + Specifies if coefficients should be projected back into + the Aitchison simplex. If false, the coefficients will be + represented as balances (default: False). + + Returns + ------- + pd.DataFrame + A table of values where columns are coefficients, and the index + is either balances or proportions, depending on the value of + `project`. + + Raises + ------ + ValueError: + Cannot perform projection into Aitchison simplex if `basis` + is not specified. + ValueError: + Cannot perform projection into Aitchison simplex + if `feature_names` is not specified. + """ + self._check_projection(project) + coef = pd.DataFrame() + + for r in self.results: + c = r.params + c.name = r.model.endog_names + coef = coef.append(c) + + if project: + # `check=False`, due to a problem with error handling + # addressed here https://github.com/biocore/scikit-bio/pull/1396 + # This will need to be fixed here: + # https://github.com/biocore/gneiss/issues/34 + c = ilr_inv(coef.values.T, basis=self.basis, check=False).T + return pd.DataFrame(c, index=self.feature_names, + columns=coef.columns) + else: + return coef diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index dbd2cdf..ff4932d 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -7,11 +7,13 @@ # # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- +import numpy as np import pandas as pd import pandas.util.testing as pdt import statsmodels.formula.api as smf import unittest from gneiss._summary import RegressionResults +from skbio.stats.composition import _gram_schmidt_basis, ilr_inv class TestRegressionResults(unittest.TestCase): @@ -55,5 +57,52 @@ def test_regression_results_pvalues(self): check_exact=False, check_less_precise=True) + def test_check_projection(self): + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + res = RegressionResults(self.results, basis=basis, + feature_names=feature_names) + + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + + # Test if feature_names is checked for + res = RegressionResults(self.results, basis=basis) + with self.assertRaises(ValueError): + res._check_projection(True) + + # Test if basis is checked for + res = RegressionResults(self.results, feature_names=feature_names) + with self.assertRaises(ValueError): + res._check_projection(True) + + def test_regression_results_coefficient(self): + exp_coef = pd.DataFrame({'Intercept': [1.447368, -0.052632], + 'X': [0.539474, 1.289474]}, + index=['Y1', 'Y2']) + res = RegressionResults(self.results) + pdt.assert_frame_equal(res.coefficients(), exp_coef, + check_exact=False, + check_less_precise=True) + + def test_regression_results_coefficient_projection(self): + exp_coef = pd.DataFrame( + {'Intercept': ilr_inv(np.array([[1.447368, -0.052632]])), + 'X': ilr_inv(np.array([[0.539474, 1.289474]]))}, + index=['Z1', 'Z2', 'Z3']) + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + res = RegressionResults(self.results, basis=basis, + feature_names=feature_names) + + pdt.assert_frame_equal(res.coefficients(project=True), exp_coef, + check_exact=False, + check_less_precise=True) + + def test_regression_results_coefficient_project_error(self): + res = RegressionResults(self.results) + with self.assertRaises(ValueError): + res.coefficients(project=True) + if __name__ == "__main__": unittest.main()