forked from MESMER-group/mesmer
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add tests for Power Transformer (MESMER-group#430)
* add tests for power transformer * add comments * test numerical stability --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>
- Loading branch information
1 parent
74e070b
commit 0ff1cc5
Showing
1 changed file
with
153 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
import numpy as np | ||
import pytest | ||
import scipy as sp | ||
|
||
from mesmer.mesmer_m.power_transformer import ( | ||
PowerTransformerVariableLambda, | ||
lambda_function, | ||
) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"coeffs, t, expected", | ||
[ | ||
([1, 0.1], -np.inf, 2.0), | ||
([1, 0.1], np.inf, 0.0), | ||
([1, -0.1], -np.inf, 0.0), | ||
([1, -0.1], np.inf, 2.0), | ||
([0, 0], 1, 2), | ||
([0, 1], 1, 2), | ||
([1, 0], 1, 1), | ||
([2, 0], 1, 2 / 3), | ||
([1, 1], np.log(9), 2 / 10), | ||
], | ||
) | ||
def test_lambda_function(coeffs, t, expected): | ||
|
||
result = lambda_function(coeffs, t) | ||
np.testing.assert_allclose(result, expected) | ||
|
||
|
||
def test_fit_power_transformer(): | ||
# with enough random normal data points the fit should be close to 1 and 0 | ||
np.random.seed(0) | ||
gridcells = 1 | ||
n_months = 100_000 | ||
monthly_residuals = np.random.standard_normal((n_months, gridcells)) * 10 | ||
yearly_T = np.ones((n_months, gridcells)) | ||
|
||
pt = PowerTransformerVariableLambda(standardize=False) | ||
# standardize false speeds up the fit and does not impact the coefficients | ||
pt.fit(monthly_residuals, yearly_T, gridcells) | ||
|
||
result = pt.coeffs_ | ||
# to test viability | ||
expected = np.array([[1, 0]]) | ||
np.testing.assert_allclose(result, expected, atol=1e-2) | ||
|
||
# to test numerical stability | ||
expected_exact = np.array([[9.976913e-01, -1.998520e-05]]) | ||
np.testing.assert_allclose(result, expected_exact, atol=1e-7) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"skew, bounds", | ||
[ | ||
(-2, [1, 2]), # left skewed data | ||
(2, [0, 1]), # right skewed data | ||
(-5, [1, 2]), # more skew | ||
(5, [0, 1]), # more skew | ||
(-0.5, [1, 2]), # less skew | ||
(0.5, [0, 1]), # less skew | ||
(0, [0.9, 1.1]), # no skew | ||
], | ||
) | ||
def test_yeo_johnson_optimize_lambda(skew, bounds): | ||
np.random.seed(0) | ||
n_years = 100_000 | ||
|
||
yearly_T = np.random.randn(n_years) | ||
local_monthly_residuals = sp.stats.skewnorm.rvs(skew, size=n_years) | ||
|
||
pt = PowerTransformerVariableLambda(standardize=False) | ||
pt.coeffs_ = pt._yeo_johnson_optimize_lambda(local_monthly_residuals, yearly_T) | ||
lmbda = lambda_function(pt.coeffs_, yearly_T) | ||
transformed = pt._yeo_johnson_transform(local_monthly_residuals, lmbda) | ||
|
||
assert (lmbda >= bounds[0]).all() & (lmbda <= bounds[1]).all() | ||
np.testing.assert_allclose(sp.stats.skew(transformed), 0, atol=0.1) | ||
|
||
|
||
def test_transform(): | ||
# NOTE: testing trivial transform with lambda = 1 | ||
n_ts = 20 | ||
n_gridcells = 10 | ||
|
||
pt = PowerTransformerVariableLambda(standardize=False) | ||
pt.coeffs_ = np.tile([1, 0], (n_gridcells, 1)) | ||
|
||
monthly_residuals = np.ones((n_ts, n_gridcells)) | ||
yearly_T = np.zeros((n_ts, n_gridcells)) | ||
|
||
result = pt.transform(monthly_residuals, yearly_T) | ||
expected = np.ones((n_ts, n_gridcells)) | ||
|
||
np.testing.assert_equal(result, expected) | ||
|
||
|
||
def test_yeo_johnson_transform(): | ||
pt = PowerTransformerVariableLambda(standardize=False) | ||
|
||
# test all possible combinations of local_monthly_residuals and lambdas | ||
local_monthly_residuals = np.array([0.0, 1.0, 0.0, 1.0, -1.0, -1.0]) | ||
lambdas = np.array([1.0, 1.0, 0.0, 0.0, 1.0, 2.0]) | ||
|
||
result = pt._yeo_johnson_transform(local_monthly_residuals, lambdas) | ||
expected = np.array([0.0, 1.0, 0.0, np.log1p(1.0), -1.0, -np.log1p(1.0)]) | ||
|
||
np.testing.assert_equal(result, expected) | ||
|
||
|
||
def test_transform_roundtrip(): | ||
n_ts = 20 | ||
n_gridcells = 5 | ||
# dummy seasonal cylce, having negative and positive values | ||
monthly_residuals = np.sin(np.linspace(0, 2 * np.pi, n_ts * n_gridcells)).reshape( | ||
n_ts, n_gridcells | ||
) | ||
yearly_T = np.zeros((n_ts, n_gridcells)) | ||
|
||
pt = PowerTransformerVariableLambda(standardize=False) | ||
# dummy lambdas (since yearly_T is zero lambda comes out to be second coefficient) | ||
# we have all cases for lambdas 0 and 2 (special cases), 1 (identity case) | ||
# lambda between 0 and 1 and lambda between 1 and 2 for concave and convex cases | ||
pt.coeffs_ = np.array([[0, 0], [0, 1], [0, 2], [0, 0.5], [0, 1.5]]) | ||
pt.mins_ = np.amin(monthly_residuals, axis=0) | ||
pt.maxs_ = np.amax(monthly_residuals, axis=0) | ||
|
||
transformed = pt.transform(monthly_residuals, yearly_T) | ||
result = pt.inverse_transform(transformed, yearly_T) | ||
|
||
np.testing.assert_allclose(result, monthly_residuals, atol=1e-7) | ||
|
||
|
||
def test_standard_scaler(): | ||
# generate random data with mean different from 0 and std different from one | ||
np.random.seed(0) | ||
n_ts = 100_000 | ||
n_gridcells = 1 | ||
monthly_residuals = np.random.randn(n_ts, n_gridcells) * 10 + 5 | ||
yearly_T = np.zeros((n_ts, n_gridcells)) | ||
|
||
# fit the transformer | ||
pt = PowerTransformerVariableLambda(standardize=True) | ||
pt.fit(monthly_residuals, yearly_T, n_gridcells) | ||
transformed = pt.transform(monthly_residuals, yearly_T) | ||
|
||
# the transformed data should have mean close to 0 and std close to 1 | ||
np.testing.assert_allclose(np.mean(transformed), 0, atol=1e-2) | ||
np.testing.assert_allclose(np.std(transformed), 1, atol=1e-2) | ||
|
||
# inverse transform should give back the original data | ||
result = pt.inverse_transform(transformed, yearly_T) | ||
np.testing.assert_allclose(result, monthly_residuals, atol=1e-7) |