From ce93606563f699f24e2a94a50df54c43e693e562 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Thu, 28 Jul 2016 14:36:27 -0700 Subject: [PATCH 1/8] Adding in predict and residual querying --- gneiss/_summary.py | 64 ++++++++++++++++++++++++++++++++++++ gneiss/tests/test_summary.py | 40 ++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index 7ca84ba..b09730c 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -127,3 +127,67 @@ def coefficients(self, project=False): return c else: return coef + + def residuals(self, project=False): + """ Returns calculated residuals. + Parameters + ---------- + X : pd.DataFrame, optional + Input table of covariates. If not specified, then the + fitted values calculated from training the model will be + returned. + project : bool, optional + Specifies if coefficients should be projected back into + the Aitchison simplex. If false, the coefficients will be + represented as balances (default: False). + Returns + ------- + pd.DataFrame + A table of values where rows are coefficients, and the columns + are either balances or proportions, depending on the value of + `project`. + """ + self._check_projection(project) + + resid = pd.DataFrame() + + for i in range(len(self.results)): + err = self.results[i].resid + err.name = self.results[i].model.endog_names + resid = resid.append(err) + + if project: + # check=True due to type issue resolved here + # https://github.com/biocore/scikit-bio/pull/1396 + proj_resid = ilr_inv(resid.values.T, basis=self.basis, + check=False).T + proj_resid = pd.DataFrame(proj_resid, index=self.feature_names, + columns=resid.columns) + return proj_resid + else: + return resid + + def predict(self, X, project=False): + """ Performs a prediction based on model. + + Parameters + ---------- + X : pd.DataFrame, optional + Input table of covariates. If not specified, then the + fitted values calculated from training the model will be + returned. + project : bool, optional + Specifies if coefficients should be projected back into + the Aitchison simplex. If false, the coefficients will be + represented as balances (default: False). + + Returns + ------- + pd.DataFrame + A table of values where rows are coefficients, and the columns + are either balances or proportions, depending on the value of + `project`. + """ + self._check_projection(project) + + pass diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index db56876..284fe3f 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -99,5 +99,45 @@ def test_regression_results_coefficient_projection(self): check_exact=False, check_less_precise=True) + def test_regression_results_residuals_projection(self): + # aliasing np.array for the sake of pep8 + A = np.array + exp_resid = pd.DataFrame({'s1': ilr_inv(A([-0.986842, -0.236842])), + 's2': ilr_inv(A([-0.065789, -1.815789])), + 's3': ilr_inv(A([1.473684, 0.473684])), + 's4': ilr_inv(A([1.394737, -1.105263])), + 's5': ilr_inv(A([-1.065789, 1.184211])), + 's6': ilr_inv(A([-1.144737, -0.394737])), + 's7': ilr_inv(A([0.394737, 1.894737]))}, + index=['Z1', 'Z2', 'Z3']) + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + res = RegressionResults(self.results, basis=basis, + feature_names=feature_names) + pdt.assert_frame_equal(res.residuals(project=True), exp_resid, + check_exact=False, + check_less_precise=True) + + def test_regression_results_residuals(self): + exp_resid = pd.DataFrame({'s1': [-0.986842, -0.236842], + 's2': [-0.065789, -1.815789], + 's3': [1.473684, 0.473684], + 's4': [1.394737, -1.105263], + 's5': [-1.065789, 1.184211], + 's6': [-1.144737, -0.394737], + 's7': [0.394737, 1.894737]}, + index=['Y1', 'Y2']) + res = RegressionResults(self.results) + pdt.assert_frame_equal(res.residuals(), exp_resid, + check_exact=False, + check_less_precise=True) + + def test_regression_results_predict(self): + pass + + def test_regression_results_predict_projection(self): + pass + + if __name__ == "__main__": unittest.main() From 8233fe5b31b9e9936ad23c432dcf80b5dca35399 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Sat, 30 Jul 2016 12:56:19 -0700 Subject: [PATCH 2/8] Adding prediction feature --- gneiss/_summary.py | 67 ++++++++++++++++++++++------ gneiss/tests/test_summary.py | 84 +++++++++++++++++++++++++++++++----- 2 files changed, 129 insertions(+), 22 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index b09730c..5ef83b2 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -15,10 +15,11 @@ class RegressionResults(): """ Summary object for storing regression results. """ - def __init__(self, stat_results, + def __init__(self, + stat_results, feature_names=None, basis=None): - """ Reorganizes statsmodels regression modules. + """ Reorganizes statsmodels regression results modules. Accepts a list of statsmodels RegressionResults objects and performs some addition summary statistics. @@ -78,11 +79,11 @@ def _check_projection(self, project): """ if self.basis is None and project: raise ValueError("Cannot perform projection into Aitchison simplex" - "if `basis` is not specified.") + " if `basis` is not specified.") if self.feature_names is None and project: raise ValueError("Cannot perform projection into Aitchison simplex" - "if `feature_names` is not specified.") + " if `feature_names` is not specified.") def coefficients(self, project=False): """ Returns coefficients from fit. @@ -130,6 +131,7 @@ def coefficients(self, project=False): def residuals(self, project=False): """ Returns calculated residuals. + Parameters ---------- X : pd.DataFrame, optional @@ -151,9 +153,9 @@ def residuals(self, project=False): resid = pd.DataFrame() - for i in range(len(self.results)): - err = self.results[i].resid - err.name = self.results[i].model.endog_names + for r in self.results: + err = r.resid + err.name = r.model.endog_names resid = resid.append(err) if project: @@ -167,19 +169,21 @@ def residuals(self, project=False): else: return resid - def predict(self, X, project=False): + def predict(self, X=None, project=False, **kwargs): """ Performs a prediction based on model. Parameters ---------- X : pd.DataFrame, optional - Input table of covariates. If not specified, then the - fitted values calculated from training the model will be - returned. + Input table of covariates, where columns are covariates, and + rows are samples. If not specified, then the fitted values + calculated from training the model will be returned. project : bool, optional Specifies if coefficients should be projected back into the Aitchison simplex. If false, the coefficients will be represented as balances (default: False). + **kwargs : dict + Other arguments to be passed into the model prediction. Returns ------- @@ -187,7 +191,46 @@ def predict(self, X, project=False): A table of values where rows are coefficients, and the columns are either balances or proportions, depending on the value of `project`. + + Examples + -------- + >>> import pandas as pd + >>> from gneiss._model import RegressionModel + >>> data = pd.DataFrame([[1, 1, 1], + ... [3, 2, 3], + ... [4, 3, 2], + ... [5, 4, 4], + ... [2, 5, 3], + ... [3, 6, 5], + ... [4, 7, 4]], + ... index=['s1', 's2', 's3', 's4', + ... 's5', 's6', 's7'], + ... columns=['Y1', 'Y2', 'X']) + >>> model = RegressionResults([smf.ols(formula="Y1 ~ X", data=data).fit(), + ... smf.ols(formula="Y2 ~ X", data=data).fit()]) + >>> model.predict(data['X']) """ self._check_projection(project) - pass + prediction = pd.DataFrame() + for m in self.results: + # check if X is none. + p = pd.Series(m.predict(X, **kwargs)) + p.name = m.model.endog_names + if X is not None: + p.index = X.index + else: + p.index = m.fittedvalues.index + prediction = prediction.append(p) + + if project: + # check=True due to type issue resolved here + # https://github.com/biocore/scikit-bio/pull/1396 + proj_prediction = ilr_inv(prediction.values.T, basis=self.basis, + check=False) + proj_prediction = pd.DataFrame(proj_prediction, + columns=self.feature_names, + index=prediction.columns) + return proj_prediction + + return prediction.T diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index 284fe3f..9d49f05 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -19,12 +19,16 @@ class TestRegressionResults(unittest.TestCase): def setUp(self): - self.data = pd.DataFrame([[1, 3, 4, 5, 2, 3, 4], - list(range(1, 8)), - [1, 3, 2, 4, 3, 5, 4]], - columns=['s1', 's2', 's3', 's4', + self.data = pd.DataFrame([[1, 1, 1], + [3, 2, 3], + [4, 3, 2], + [5, 4, 4], + [2, 5, 3], + [3, 6, 5], + [4, 7, 4]], + index=['s1', 's2', 's3', 's4', 's5', 's6', 's7'], - index=['Y1', 'Y2', 'X']).T + columns=['Y1', 'Y2', 'X']) model1 = smf.ols(formula="Y1 ~ X", data=self.data) model2 = smf.ols(formula="Y2 ~ X", data=self.data) self.results = [model1.fit(), model2.fit()] @@ -99,9 +103,13 @@ def test_regression_results_coefficient_projection(self): check_exact=False, check_less_precise=True) + def test_regression_results_coefficient_project_error(self): + res = RegressionResults(self.results) + with self.assertRaises(ValueError): + res.coefficients(project=True) + def test_regression_results_residuals_projection(self): - # aliasing np.array for the sake of pep8 - A = np.array + A = np.array # aliasing np.array for the sake of pep8 exp_resid = pd.DataFrame({'s1': ilr_inv(A([-0.986842, -0.236842])), 's2': ilr_inv(A([-0.065789, -1.815789])), 's3': ilr_inv(A([1.473684, 0.473684])), @@ -133,11 +141,67 @@ def test_regression_results_residuals(self): check_less_precise=True) def test_regression_results_predict(self): - pass + model = RegressionResults(self.results) + res_predict = model.predict(self.data[['X']]) - def test_regression_results_predict_projection(self): - pass + exp_predict = pd.DataFrame({'s1': [1.986842, 1.236842], + 's2': [3.065789, 3.815789], + 's3': [2.526316, 2.526316], + 's4': [3.605263, 5.105263], + 's5': [3.065789, 3.815789], + 's6': [4.144737, 6.394737], + 's7': [3.605263, 5.105263]}, + index=['Y1', 'Y2']).T + + pdt.assert_frame_equal(res_predict, exp_predict) + + def test_regression_results_predict_extrapolate(self): + model = RegressionResults(self.results) + extrapolate = pd.DataFrame({'X': [8, 9, 10]}, + index = ['k1', 'k2', 'k3']) + res_predict = model.predict(extrapolate) + + exp_predict = pd.DataFrame({'k1': [5.76315789, 10.26315789], + 'k2': [6.30263158, 11.55263158], + 'k3': [6.84210526, 12.84210526]}, + index=['Y1', 'Y2']).T + + pdt.assert_frame_equal(res_predict, exp_predict) + + def test_regression_results_predict_projection(self): + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + model = RegressionResults(self.results, basis=basis, + feature_names=feature_names) + + res_predict = model.predict(self.data[['X']], project=True) + A = np.array # aliasing np.array for the sake of pep8 + exp_predict = pd.DataFrame({'s1': ilr_inv(A([1.986842, 1.236842])), + 's2': ilr_inv(A([3.065789, 3.815789])), + 's3': ilr_inv(A([2.526316, 2.526316])), + 's4': ilr_inv(A([3.605263, 5.105263])), + 's5': ilr_inv(A([3.065789, 3.815789])), + 's6': ilr_inv(A([4.144737, 6.394737])), + 's7': ilr_inv(A([3.605263, 5.105263]))}, + index=feature_names).T + + pdt.assert_frame_equal(res_predict, exp_predict) + + def test_regression_results_predict_none(self): + model = RegressionResults(self.results) + res_predict = model.predict() + + exp_predict = pd.DataFrame({'s1': [1.986842, 1.236842], + 's2': [3.065789, 3.815789], + 's3': [2.526316, 2.526316], + 's4': [3.605263, 5.105263], + 's5': [3.065789, 3.815789], + 's6': [4.144737, 6.394737], + 's7': [3.605263, 5.105263]}, + index=['Y1', 'Y2']).T + + pdt.assert_frame_equal(res_predict, exp_predict) if __name__ == "__main__": unittest.main() From 51aef4cf334d9e683f13530fce760fcc304ed272 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Sat, 30 Jul 2016 12:59:46 -0700 Subject: [PATCH 3/8] pep8 --- gneiss/_summary.py | 18 ------------------ gneiss/tests/test_summary.py | 21 ++++++++++----------- ipynb/balance_trees.ipynb | 2 +- 3 files changed, 11 insertions(+), 30 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index 5ef83b2..89bfd3d 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -191,24 +191,6 @@ def predict(self, X=None, project=False, **kwargs): A table of values where rows are coefficients, and the columns are either balances or proportions, depending on the value of `project`. - - Examples - -------- - >>> import pandas as pd - >>> from gneiss._model import RegressionModel - >>> data = pd.DataFrame([[1, 1, 1], - ... [3, 2, 3], - ... [4, 3, 2], - ... [5, 4, 4], - ... [2, 5, 3], - ... [3, 6, 5], - ... [4, 7, 4]], - ... index=['s1', 's2', 's3', 's4', - ... 's5', 's6', 's7'], - ... columns=['Y1', 'Y2', 'X']) - >>> model = RegressionResults([smf.ols(formula="Y1 ~ X", data=data).fit(), - ... smf.ols(formula="Y2 ~ X", data=data).fit()]) - >>> model.predict(data['X']) """ self._check_projection(project) diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index 9d49f05..ef9e15d 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -27,7 +27,7 @@ def setUp(self): [3, 6, 5], [4, 7, 4]], index=['s1', 's2', 's3', 's4', - 's5', 's6', 's7'], + 's5', 's6', 's7'], columns=['Y1', 'Y2', 'X']) model1 = smf.ols(formula="Y1 ~ X", data=self.data) model2 = smf.ols(formula="Y2 ~ X", data=self.data) @@ -109,14 +109,14 @@ def test_regression_results_coefficient_project_error(self): res.coefficients(project=True) def test_regression_results_residuals_projection(self): - A = np.array # aliasing np.array for the sake of pep8 + A = np.array # aliasing np.array for the sake of pep8 exp_resid = pd.DataFrame({'s1': ilr_inv(A([-0.986842, -0.236842])), 's2': ilr_inv(A([-0.065789, -1.815789])), - 's3': ilr_inv(A([1.473684, 0.473684])), + 's3': ilr_inv(A([1.473684, 0.473684])), 's4': ilr_inv(A([1.394737, -1.105263])), - 's5': ilr_inv(A([-1.065789, 1.184211])), + 's5': ilr_inv(A([-1.065789, 1.184211])), 's6': ilr_inv(A([-1.144737, -0.394737])), - 's7': ilr_inv(A([0.394737, 1.894737]))}, + 's7': ilr_inv(A([0.394737, 1.894737]))}, index=['Z1', 'Z2', 'Z3']) feature_names = ['Z1', 'Z2', 'Z3'] basis = _gram_schmidt_basis(3) @@ -129,11 +129,11 @@ def test_regression_results_residuals_projection(self): def test_regression_results_residuals(self): exp_resid = pd.DataFrame({'s1': [-0.986842, -0.236842], 's2': [-0.065789, -1.815789], - 's3': [1.473684, 0.473684], + 's3': [1.473684, 0.473684], 's4': [1.394737, -1.105263], - 's5': [-1.065789, 1.184211], + 's5': [-1.065789, 1.184211], 's6': [-1.144737, -0.394737], - 's7': [0.394737, 1.894737]}, + 's7': [0.394737, 1.894737]}, index=['Y1', 'Y2']) res = RegressionResults(self.results) pdt.assert_frame_equal(res.residuals(), exp_resid, @@ -158,8 +158,7 @@ def test_regression_results_predict(self): def test_regression_results_predict_extrapolate(self): model = RegressionResults(self.results) extrapolate = pd.DataFrame({'X': [8, 9, 10]}, - index = ['k1', 'k2', 'k3']) - + index=['k1', 'k2', 'k3']) res_predict = model.predict(extrapolate) exp_predict = pd.DataFrame({'k1': [5.76315789, 10.26315789], @@ -176,7 +175,7 @@ def test_regression_results_predict_projection(self): feature_names=feature_names) res_predict = model.predict(self.data[['X']], project=True) - A = np.array # aliasing np.array for the sake of pep8 + A = np.array # aliasing np.array for the sake of pep8 exp_predict = pd.DataFrame({'s1': ilr_inv(A([1.986842, 1.236842])), 's2': ilr_inv(A([3.065789, 3.815789])), 's3': ilr_inv(A([2.526316, 2.526316])), diff --git a/ipynb/balance_trees.ipynb b/ipynb/balance_trees.ipynb index 13d41be..16fcd27 100644 --- a/ipynb/balance_trees.ipynb +++ b/ipynb/balance_trees.ipynb @@ -459,7 +459,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1" + "version": "3.5.2" } }, "nbformat": 4, From 6cad36bbaf7e6f985368298a81fe909389f3f9ed Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Sat, 30 Jul 2016 18:04:29 -0700 Subject: [PATCH 4/8] Fixing orientation of residuals --- gneiss/_summary.py | 9 ++++----- gneiss/tests/test_summary.py | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index 89bfd3d..768040e 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -145,7 +145,7 @@ def residuals(self, project=False): Returns ------- pd.DataFrame - A table of values where rows are coefficients, and the columns + A table of values where rows are samples, and the columns are either balances or proportions, depending on the value of `project`. """ @@ -163,11 +163,10 @@ def residuals(self, project=False): # https://github.com/biocore/scikit-bio/pull/1396 proj_resid = ilr_inv(resid.values.T, basis=self.basis, check=False).T - proj_resid = pd.DataFrame(proj_resid, index=self.feature_names, - columns=resid.columns) - return proj_resid + return pd.DataFrame(proj_resid, index=self.feature_names, + columns=resid.columns).T else: - return resid + return resid.T def predict(self, X=None, project=False, **kwargs): """ Performs a prediction based on model. diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index ef9e15d..1b11c37 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -117,7 +117,7 @@ def test_regression_results_residuals_projection(self): 's5': ilr_inv(A([-1.065789, 1.184211])), 's6': ilr_inv(A([-1.144737, -0.394737])), 's7': ilr_inv(A([0.394737, 1.894737]))}, - index=['Z1', 'Z2', 'Z3']) + index=['Z1', 'Z2', 'Z3']).T feature_names = ['Z1', 'Z2', 'Z3'] basis = _gram_schmidt_basis(3) res = RegressionResults(self.results, basis=basis, @@ -134,7 +134,7 @@ def test_regression_results_residuals(self): 's5': [-1.065789, 1.184211], 's6': [-1.144737, -0.394737], 's7': [0.394737, 1.894737]}, - index=['Y1', 'Y2']) + index=['Y1', 'Y2']).T res = RegressionResults(self.results) pdt.assert_frame_equal(res.residuals(), exp_resid, check_exact=False, From 501265e145647f92d646183da81aedfbb58fd104 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Sat, 30 Jul 2016 18:05:32 -0700 Subject: [PATCH 5/8] pep8 --- gneiss/_summary.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index 768040e..baff002 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -163,8 +163,8 @@ def residuals(self, project=False): # https://github.com/biocore/scikit-bio/pull/1396 proj_resid = ilr_inv(resid.values.T, basis=self.basis, check=False).T - return pd.DataFrame(proj_resid, index=self.feature_names, - columns=resid.columns).T + return pd.DataFrame(proj_resid, index=self.feature_names, + columns=resid.columns).T else: return resid.T @@ -209,9 +209,7 @@ def predict(self, X=None, project=False, **kwargs): # https://github.com/biocore/scikit-bio/pull/1396 proj_prediction = ilr_inv(prediction.values.T, basis=self.basis, check=False) - proj_prediction = pd.DataFrame(proj_prediction, - columns=self.feature_names, - index=prediction.columns) - return proj_prediction - + return pd.DataFrame(proj_prediction, + columns=self.feature_names, + index=prediction.columns) return prediction.T From 92bba97a9174f7225aba7bea13326e923bd907ba Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Tue, 2 Aug 2016 13:30:51 -0400 Subject: [PATCH 6/8] Adding comments for further clarification on project --- gneiss/_summary.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index db33cde..b056a22 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -160,8 +160,10 @@ def residuals(self, project=False): resid = resid.append(err) if project: - # check=True due to type issue resolved here - # https://github.com/biocore/scikit-bio/pull/1396 + # `check=False`, due to a problem with error handling + # addressed here https://github.com/biocore/scikit-bio/pull/1396 + # This will need to be fixed here: + # https://github.com/biocore/gneiss/issues/34 proj_resid = ilr_inv(resid.values.T, basis=self.basis, check=False).T return pd.DataFrame(proj_resid, index=self.feature_names, @@ -206,8 +208,10 @@ def predict(self, X=None, project=False, **kwargs): prediction = prediction.append(p) if project: - # check=True due to type issue resolved here - # https://github.com/biocore/scikit-bio/pull/1396 + # `check=False`, due to a problem with error handling + # addressed here https://github.com/biocore/scikit-bio/pull/1396 + # This will need to be fixed here: + # https://github.com/biocore/gneiss/issues/34 proj_prediction = ilr_inv(prediction.values.T, basis=self.basis, check=False) return pd.DataFrame(proj_prediction, From b0dfdff44a890e78edb296cf637b3994ce93d08f Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Tue, 2 Aug 2016 13:36:40 -0400 Subject: [PATCH 7/8] TST: Adding in tests for residual and predict --- gneiss/tests/test_summary.py | 95 ++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/gneiss/tests/test_summary.py b/gneiss/tests/test_summary.py index fc246d2..84f6523 100644 --- a/gneiss/tests/test_summary.py +++ b/gneiss/tests/test_summary.py @@ -108,5 +108,100 @@ def test_regression_results_coefficient_project_error(self): with self.assertRaises(ValueError): res.coefficients(project=True) + def test_regression_results_residuals_projection(self): + A = np.array # aliasing np.array for the sake of pep8 + exp_resid = pd.DataFrame({'s1': ilr_inv(A([-0.986842, -0.236842])), + 's2': ilr_inv(A([-0.065789, -1.815789])), + 's3': ilr_inv(A([1.473684, 0.473684])), + 's4': ilr_inv(A([1.394737, -1.105263])), + 's5': ilr_inv(A([-1.065789, 1.184211])), + 's6': ilr_inv(A([-1.144737, -0.394737])), + 's7': ilr_inv(A([0.394737, 1.894737]))}, + index=['Z1', 'Z2', 'Z3']).T + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + res = RegressionResults(self.results, basis=basis, + feature_names=feature_names) + pdt.assert_frame_equal(res.residuals(project=True), exp_resid, + check_exact=False, + check_less_precise=True) + + def test_regression_results_residuals(self): + exp_resid = pd.DataFrame({'s1': [-0.986842, -0.236842], + 's2': [-0.065789, -1.815789], + 's3': [1.473684, 0.473684], + 's4': [1.394737, -1.105263], + 's5': [-1.065789, 1.184211], + 's6': [-1.144737, -0.394737], + 's7': [0.394737, 1.894737]}, + index=['Y1', 'Y2']).T + res = RegressionResults(self.results) + pdt.assert_frame_equal(res.residuals(), exp_resid, + check_exact=False, + check_less_precise=True) + + def test_regression_results_predict(self): + model = RegressionResults(self.results) + res_predict = model.predict(self.data[['X']]) + + exp_predict = pd.DataFrame({'s1': [1.986842, 1.236842], + 's2': [3.065789, 3.815789], + 's3': [2.526316, 2.526316], + 's4': [3.605263, 5.105263], + 's5': [3.065789, 3.815789], + 's6': [4.144737, 6.394737], + 's7': [3.605263, 5.105263]}, + index=['Y1', 'Y2']).T + + pdt.assert_frame_equal(res_predict, exp_predict) + + def test_regression_results_predict_extrapolate(self): + model = RegressionResults(self.results) + extrapolate = pd.DataFrame({'X': [8, 9, 10]}, + index=['k1', 'k2', 'k3']) + res_predict = model.predict(extrapolate) + + exp_predict = pd.DataFrame({'k1': [5.76315789, 10.26315789], + 'k2': [6.30263158, 11.55263158], + 'k3': [6.84210526, 12.84210526]}, + index=['Y1', 'Y2']).T + + pdt.assert_frame_equal(res_predict, exp_predict) + + def test_regression_results_predict_projection(self): + feature_names = ['Z1', 'Z2', 'Z3'] + basis = _gram_schmidt_basis(3) + model = RegressionResults(self.results, basis=basis, + feature_names=feature_names) + + res_predict = model.predict(self.data[['X']], project=True) + A = np.array # aliasing np.array for the sake of pep8 + exp_predict = pd.DataFrame({'s1': ilr_inv(A([1.986842, 1.236842])), + 's2': ilr_inv(A([3.065789, 3.815789])), + 's3': ilr_inv(A([2.526316, 2.526316])), + 's4': ilr_inv(A([3.605263, 5.105263])), + 's5': ilr_inv(A([3.065789, 3.815789])), + 's6': ilr_inv(A([4.144737, 6.394737])), + 's7': ilr_inv(A([3.605263, 5.105263]))}, + index=feature_names).T + + pdt.assert_frame_equal(res_predict, exp_predict) + + def test_regression_results_predict_none(self): + model = RegressionResults(self.results) + res_predict = model.predict() + + exp_predict = pd.DataFrame({'s1': [1.986842, 1.236842], + 's2': [3.065789, 3.815789], + 's3': [2.526316, 2.526316], + 's4': [3.605263, 5.105263], + 's5': [3.065789, 3.815789], + 's6': [4.144737, 6.394737], + 's7': [3.605263, 5.105263]}, + index=['Y1', 'Y2']).T + + pdt.assert_frame_equal(res_predict, exp_predict) + + if __name__ == "__main__": unittest.main() From 855078e0397361fc959a7995dc3832922662b6c1 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Tue, 9 Aug 2016 11:21:35 -0400 Subject: [PATCH 8/8] Addressing @josenava's comments --- gneiss/_summary.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/gneiss/_summary.py b/gneiss/_summary.py index b056a22..2bbf326 100644 --- a/gneiss/_summary.py +++ b/gneiss/_summary.py @@ -92,7 +92,7 @@ def coefficients(self, project=False): ---------- project : bool, optional Specifies if coefficients should be projected back into - the Aitchison simplex. If false, the coefficients will be + the Aitchison simplex [1]_. If false, the coefficients will be represented as balances (default: False). Returns @@ -110,6 +110,11 @@ def coefficients(self, project=False): ValueError: Cannot perform projection into Aitchison simplex if `feature_names` is not specified. + + References + ---------- + .. [1] Aitchison, J. "A concise guide to compositional data analysis, + CDA work." Girona 24 (2003): 73-81. """ self._check_projection(project) coef = pd.DataFrame() @@ -141,7 +146,7 @@ def residuals(self, project=False): returned. project : bool, optional Specifies if coefficients should be projected back into - the Aitchison simplex. If false, the coefficients will be + the Aitchison simplex [1]_. If false, the coefficients will be represented as balances (default: False). Returns ------- @@ -149,6 +154,11 @@ def residuals(self, project=False): A table of values where rows are samples, and the columns are either balances or proportions, depending on the value of `project`. + + References + ---------- + .. [1] Aitchison, J. "A concise guide to compositional data analysis, + CDA work." Girona 24 (2003): 73-81. """ self._check_projection(project) @@ -182,7 +192,7 @@ def predict(self, X=None, project=False, **kwargs): calculated from training the model will be returned. project : bool, optional Specifies if coefficients should be projected back into - the Aitchison simplex. If false, the coefficients will be + the Aitchison simplex [1]_. If false, the coefficients will be represented as balances (default: False). **kwargs : dict Other arguments to be passed into the model prediction. @@ -193,6 +203,11 @@ def predict(self, X=None, project=False, **kwargs): A table of values where rows are coefficients, and the columns are either balances or proportions, depending on the value of `project`. + + References + ---------- + .. [1] Aitchison, J. "A concise guide to compositional data analysis, + CDA work." Girona 24 (2003): 73-81. """ self._check_projection(project)