From 6eada7287e7e1a6098553714cc9a6a06d7b34eef Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 29 Jul 2023 12:05:16 +0200 Subject: [PATCH 01/29] STD for FDataBasis and FDataGrid --- skfda/exploratory/stats/__init__.py | 2 + skfda/exploratory/stats/_stats.py | 63 ++++++++++++++++++++++++++++- skfda/misc/_math.py | 30 ++++++++++++++ 3 files changed, 93 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/stats/__init__.py b/skfda/exploratory/stats/__init__.py index 0a1f3a6de..345eb30ac 100644 --- a/skfda/exploratory/stats/__init__.py +++ b/skfda/exploratory/stats/__init__.py @@ -19,6 +19,7 @@ "gmean", "mean", "modified_epigraph_index", + "std", "trim_mean", "var", ], @@ -37,6 +38,7 @@ gmean as gmean, mean as mean, modified_epigraph_index as modified_epigraph_index, + std as std, trim_mean as trim_mean, var as var, ) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 20d2443e2..49fb1adbc 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -5,11 +5,12 @@ from typing import Callable, TypeVar, Union import numpy as np +import functools from scipy import integrate from scipy.stats import rankdata from ...misc.metrics._lp_distances import l2_distance -from ...representation import FData, FDataGrid +from ...representation import FData, FDataGrid, FDataBasis from ...typing._metric import Metric from ...typing._numpy import NDArrayFloat from ..depth import Depth, ModifiedBandDepth @@ -76,7 +77,7 @@ def gmean(X: FDataGrid) -> FDataGrid: def cov( X: FData, - ddof: int = 1 + ddof: int = 1, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: """ Compute the covariance. @@ -99,6 +100,64 @@ def cov( return X.cov(ddof=ddof) +@functools.singledispatch +def std(X: F, ddof: int = 1) -> F: + r""" + Compute the standard deviation of all the samples in a FData object. + + .. math:: + \text{std}_X(t) = \sqrt{\frac{1}{N-\text{ddof}} + \sum_{n=1}^{N}{\left(X_n(t) - \overline{X}(t)\right)^2}} + + Args: + X: Object containing all the samples whose standard deviation is + wanted. + ddof: Means "Delta Degrees of Freedom". The divisor used in + calculations is `N - ddof`, where `N` represents the number of + samples in `X`. By default ddof is 1. + + Returns: + Standard deviation of all the samples in the original object, as a + :term:`functional data object` with just one sample. + + """ + raise NotImplementedError("Not implemented for this type") + + +@std.register(FDataGrid) +def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: + return X.copy( + data_matrix=np.std(X.data_matrix, axis=0, ddof=ddof)[np.newaxis, ...], + sample_names=("standard deviation",), + ) + + +@std.register(FDataBasis) +def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: + from ...misc._math import functional_data_object_to_basis + + if X.dim_domain != 1 or X.dim_codomain != 1: + raise NotImplementedError( + "Standard deviation only implemented " + "for univariate functions." + ) + + basis = X.basis + coeff_matrix = np.cov(X.coefficients, rowvar=False, ddof=ddof) + + def std_function(t_points: NDArrayFloat) -> NDArrayFloat: + assert len(t_points) == 1, ( + "Standard deviation function only implemented for " + "one-point-at-a-time evaluations." + ) + basis_evaluation = basis(t_points).reshape((-1, 1)) + return np.sqrt( + basis_evaluation.T @ coeff_matrix @ basis_evaluation + ).reshape((1, -1, 1)) + + return functional_data_object_to_basis(f=std_function, new_basis=X.basis) + + def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat: """ Calculate the Modified Epigraph Index of a FDataGrid. diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 4e02de2f8..52fc20e7b 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -677,3 +677,33 @@ def cosine_similarity_matrix( return _clip_cosine( inner_matrix / norm1[:, np.newaxis] / norm2[np.newaxis, :], ) + + +def functional_data_object_to_basis( + f: Callable[[NDArrayFloat], NDArrayFloat] | NDArrayFloat, + new_basis: Basis, +) -> FDataBasis: + """Express a math function as a FDataBasis with a given basis. + + Args: + f: math function. + new_basis: the basis of the output. + + Returns: + FDataBasis: FDataBasis with calculated coefficients and the new + basis. + """ + if isinstance(f, FDataBasis) and f.basis == new_basis: + return f + + inner_prod = inner_product_matrix( + new_basis, + f, + _domain_range=new_basis.domain_range, + ) + + gram_matrix = new_basis.gram_matrix() + + coefs = np.linalg.solve(gram_matrix, inner_prod) + + return FDataBasis(new_basis, coefs.T) From 036028a23e957301c659aac3c6a236d5a4e29da9 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 29 Jul 2023 13:06:24 +0200 Subject: [PATCH 02/29] A test for std(FData). --- skfda/tests/test_stats.py | 67 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/skfda/tests/test_stats.py b/skfda/tests/test_stats.py index 22b74a437..fb10ad025 100644 --- a/skfda/tests/test_stats.py +++ b/skfda/tests/test_stats.py @@ -2,8 +2,17 @@ import numpy as np -from skfda.datasets import fetch_phoneme, fetch_tecator, fetch_weather -from skfda.exploratory.stats import geometric_median, modified_epigraph_index +from skfda.datasets import ( + fetch_phoneme, + fetch_tecator, + fetch_weather, + make_gaussian_process, +) +from skfda.exploratory.stats import ( + geometric_median, modified_epigraph_index, std +) +from skfda.misc.covariances import Gaussian +from skfda.representation.basis import FourierBasis class TestGeometricMedian(unittest.TestCase): @@ -144,3 +153,57 @@ def test_mei(self) -> None: ]), rtol=1e-5, ) + + +class TestStd(unittest.TestCase): + """Test the standard deviation of fuctional data objects.""" + + def _test_std_gaussian_fourier(self, n_basis: int) -> None: + """ + Test standard deviation using + a gaussian processes and a Fourier basis. + """ + start = 0 + stop = 1 + n_features = 1000 + + gaussian_process = make_gaussian_process( + start=start, + stop=stop, + n_samples=100, + n_features=n_features, + mean=0.0, + cov=Gaussian(variance=1, length_scale=0.1), + random_state=0, + ) + fourier_basis = FourierBasis(n_basis=n_basis, domain_range=(0, 1)) + fd = gaussian_process.to_basis(fourier_basis) + + std_fd = std(fd) + grid = np.linspace(start, stop, n_features) + almost_std_fd = std(fd.to_grid(grid)).to_basis(fourier_basis) + + """ + when measuring the closeness between std(fd) and the "expected" value, + we are a bit more lenient in the extremes of the domain; as the way of + projecting the std to the basis is different in each case. + """ + inner_grid_limit = n_features // 10 + inner_grid = grid[inner_grid_limit:-inner_grid_limit] + np.testing.assert_allclose( + std_fd(inner_grid), + almost_std_fd(inner_grid), + rtol=1e-3, + ) + + outer_grid = grid[:inner_grid_limit] + grid[-inner_grid_limit:] + np.testing.assert_allclose( + std_fd(outer_grid), + almost_std_fd(outer_grid), + rtol=1e-2, + ) + + def test_std(self) -> None: + """Test standard deviation.""" + + self._test_std_gaussian_fourier(61) From b2f251a76fb9c4361b292860593a4a07459b0a0f Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 30 Jul 2023 12:58:30 +0200 Subject: [PATCH 03/29] FIX typing issues in function_to_fdatabasis --- skfda/exploratory/stats/_stats.py | 4 ++-- skfda/misc/_math.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 49fb1adbc..3f52a28cf 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -134,7 +134,7 @@ def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: @std.register(FDataBasis) def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: - from ...misc._math import functional_data_object_to_basis + from ...misc._math import function_to_fdatabasis if X.dim_domain != 1 or X.dim_codomain != 1: raise NotImplementedError( @@ -155,7 +155,7 @@ def std_function(t_points: NDArrayFloat) -> NDArrayFloat: basis_evaluation.T @ coeff_matrix @ basis_evaluation ).reshape((1, -1, 1)) - return functional_data_object_to_basis(f=std_function, new_basis=X.basis) + return function_to_fdatabasis(f=std_function, new_basis=X.basis) def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat: diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 52fc20e7b..595c9361d 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -679,8 +679,8 @@ def cosine_similarity_matrix( ) -def functional_data_object_to_basis( - f: Callable[[NDArrayFloat], NDArrayFloat] | NDArrayFloat, +def function_to_fdatabasis( + f: Callable[[NDArrayFloat], NDArrayFloat], new_basis: Basis, ) -> FDataBasis: """Express a math function as a FDataBasis with a given basis. From f5d59a95ec42f3f06295fbe96cf13d9b6c4dbec9 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 17 Aug 2023 19:17:04 +0200 Subject: [PATCH 04/29] Fix style issues. --- skfda/exploratory/stats/_stats.py | 8 ++++---- skfda/tests/test_stats.py | 13 ++++--------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 3f52a28cf..ee8557beb 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -4,13 +4,13 @@ from builtins import isinstance from typing import Callable, TypeVar, Union -import numpy as np import functools +import numpy as np from scipy import integrate from scipy.stats import rankdata from ...misc.metrics._lp_distances import l2_distance -from ...representation import FData, FDataGrid, FDataBasis +from ...representation import FData, FDataBasis, FDataGrid from ...typing._metric import Metric from ...typing._numpy import NDArrayFloat from ..depth import Depth, ModifiedBandDepth @@ -139,7 +139,7 @@ def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: if X.dim_domain != 1 or X.dim_codomain != 1: raise NotImplementedError( "Standard deviation only implemented " - "for univariate functions." + "for univariate functions.", ) basis = X.basis @@ -152,7 +152,7 @@ def std_function(t_points: NDArrayFloat) -> NDArrayFloat: ) basis_evaluation = basis(t_points).reshape((-1, 1)) return np.sqrt( - basis_evaluation.T @ coeff_matrix @ basis_evaluation + basis_evaluation.T @ coeff_matrix @ basis_evaluation, ).reshape((1, -1, 1)) return function_to_fdatabasis(f=std_function, new_basis=X.basis) diff --git a/skfda/tests/test_stats.py b/skfda/tests/test_stats.py index fb10ad025..15b11b467 100644 --- a/skfda/tests/test_stats.py +++ b/skfda/tests/test_stats.py @@ -9,7 +9,9 @@ make_gaussian_process, ) from skfda.exploratory.stats import ( - geometric_median, modified_epigraph_index, std + geometric_median, + modified_epigraph_index, + std, ) from skfda.misc.covariances import Gaussian from skfda.representation.basis import FourierBasis @@ -160,8 +162,7 @@ class TestStd(unittest.TestCase): def _test_std_gaussian_fourier(self, n_basis: int) -> None: """ - Test standard deviation using - a gaussian processes and a Fourier basis. + Test standard deviation using a gaussian processes and a Fourier basis. """ start = 0 stop = 1 @@ -183,11 +184,6 @@ def _test_std_gaussian_fourier(self, n_basis: int) -> None: grid = np.linspace(start, stop, n_features) almost_std_fd = std(fd.to_grid(grid)).to_basis(fourier_basis) - """ - when measuring the closeness between std(fd) and the "expected" value, - we are a bit more lenient in the extremes of the domain; as the way of - projecting the std to the basis is different in each case. - """ inner_grid_limit = n_features // 10 inner_grid = grid[inner_grid_limit:-inner_grid_limit] np.testing.assert_allclose( @@ -205,5 +201,4 @@ def _test_std_gaussian_fourier(self, n_basis: int) -> None: def test_std(self) -> None: """Test standard deviation.""" - self._test_std_gaussian_fourier(61) From 3e16d9055dfb4a33c2fd2207ce1d729cf1b544d8 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 17 Aug 2023 19:20:19 +0200 Subject: [PATCH 05/29] Fix style issues. --- skfda/tests/test_stats.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/skfda/tests/test_stats.py b/skfda/tests/test_stats.py index 15b11b467..ebad595c5 100644 --- a/skfda/tests/test_stats.py +++ b/skfda/tests/test_stats.py @@ -161,9 +161,7 @@ class TestStd(unittest.TestCase): """Test the standard deviation of fuctional data objects.""" def _test_std_gaussian_fourier(self, n_basis: int) -> None: - """ - Test standard deviation using a gaussian processes and a Fourier basis. - """ + """Test standard deviation: gaussian processes and a Fourier basis.""" start = 0 stop = 1 n_features = 1000 From 5f3631be42f009613b1649411e9a9bd8b12a7eb9 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Fri, 25 Aug 2023 16:22:39 +0200 Subject: [PATCH 06/29] Fix some issues outlined in the pull request: * function_to_fdatabasis to _utils * docstring in each std function * use pytest instead of unittest --- skfda/_utils/__init__.py | 1 + skfda/_utils/_utils.py | 36 +++++++++++++- skfda/exploratory/stats/_stats.py | 4 +- skfda/misc/_math.py | 30 ----------- skfda/tests/test_stats.py | 49 ------------------ skfda/tests/test_stats_std.py | 83 +++++++++++++++++++++++++++++++ 6 files changed, 121 insertions(+), 82 deletions(-) create mode 100644 skfda/tests/test_stats_std.py diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 387f22b58..838328a6c 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -20,6 +20,7 @@ "_same_domain", "_to_grid", "_to_grid_points", + "function_to_fdatabasis", "nquad_vec", ], '_warping': [ diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 357591baa..653af9a34 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -4,7 +4,6 @@ import functools import numbers -from functools import singledispatch from typing import ( TYPE_CHECKING, Any, @@ -36,7 +35,7 @@ ArrayDTypeT = TypeVar("ArrayDTypeT", bound="np.generic") if TYPE_CHECKING: - from ..representation import FData, FDataGrid + from ..representation import FData, FDataBasis, FDataGrid from ..representation.basis import Basis from ..representation.extrapolation import ExtrapolationLike @@ -609,3 +608,36 @@ def _classifier_get_classes( f'one; got {classes.size} class', ) return classes, y_ind + + +def function_to_fdatabasis( + f: Callable[[NDArrayFloat], NDArrayFloat], + new_basis: Basis, +) -> FDataBasis: + """Express a math function as a FDataBasis with a given basis. + + Args: + f: math function. + new_basis: the basis of the output. + + Returns: + FDataBasis: FDataBasis with calculated coefficients and the new + basis. + """ + from .. import FDataBasis + from ..misc._math import inner_product_matrix + + if isinstance(f, FDataBasis) and f.basis == new_basis: + return f + + inner_prod = inner_product_matrix( + new_basis, + f, + _domain_range=new_basis.domain_range, + ) + + gram_matrix = new_basis.gram_matrix() + + coefs = np.linalg.solve(gram_matrix, inner_prod) + + return FDataBasis(new_basis, coefs.T) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index ee8557beb..c084b9354 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -126,6 +126,7 @@ def std(X: F, ddof: int = 1) -> F: @std.register(FDataGrid) def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: + """Standard deviation of a FDataGrid.""" return X.copy( data_matrix=np.std(X.data_matrix, axis=0, ddof=ddof)[np.newaxis, ...], sample_names=("standard deviation",), @@ -134,7 +135,8 @@ def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: @std.register(FDataBasis) def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: - from ...misc._math import function_to_fdatabasis + """Standard deviation of a FDataBasis.""" + from ..._utils import function_to_fdatabasis if X.dim_domain != 1 or X.dim_codomain != 1: raise NotImplementedError( diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 595c9361d..4e02de2f8 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -677,33 +677,3 @@ def cosine_similarity_matrix( return _clip_cosine( inner_matrix / norm1[:, np.newaxis] / norm2[np.newaxis, :], ) - - -def function_to_fdatabasis( - f: Callable[[NDArrayFloat], NDArrayFloat], - new_basis: Basis, -) -> FDataBasis: - """Express a math function as a FDataBasis with a given basis. - - Args: - f: math function. - new_basis: the basis of the output. - - Returns: - FDataBasis: FDataBasis with calculated coefficients and the new - basis. - """ - if isinstance(f, FDataBasis) and f.basis == new_basis: - return f - - inner_prod = inner_product_matrix( - new_basis, - f, - _domain_range=new_basis.domain_range, - ) - - gram_matrix = new_basis.gram_matrix() - - coefs = np.linalg.solve(gram_matrix, inner_prod) - - return FDataBasis(new_basis, coefs.T) diff --git a/skfda/tests/test_stats.py b/skfda/tests/test_stats.py index ebad595c5..79f96ea8e 100644 --- a/skfda/tests/test_stats.py +++ b/skfda/tests/test_stats.py @@ -6,15 +6,11 @@ fetch_phoneme, fetch_tecator, fetch_weather, - make_gaussian_process, ) from skfda.exploratory.stats import ( geometric_median, modified_epigraph_index, - std, ) -from skfda.misc.covariances import Gaussian -from skfda.representation.basis import FourierBasis class TestGeometricMedian(unittest.TestCase): @@ -155,48 +151,3 @@ def test_mei(self) -> None: ]), rtol=1e-5, ) - - -class TestStd(unittest.TestCase): - """Test the standard deviation of fuctional data objects.""" - - def _test_std_gaussian_fourier(self, n_basis: int) -> None: - """Test standard deviation: gaussian processes and a Fourier basis.""" - start = 0 - stop = 1 - n_features = 1000 - - gaussian_process = make_gaussian_process( - start=start, - stop=stop, - n_samples=100, - n_features=n_features, - mean=0.0, - cov=Gaussian(variance=1, length_scale=0.1), - random_state=0, - ) - fourier_basis = FourierBasis(n_basis=n_basis, domain_range=(0, 1)) - fd = gaussian_process.to_basis(fourier_basis) - - std_fd = std(fd) - grid = np.linspace(start, stop, n_features) - almost_std_fd = std(fd.to_grid(grid)).to_basis(fourier_basis) - - inner_grid_limit = n_features // 10 - inner_grid = grid[inner_grid_limit:-inner_grid_limit] - np.testing.assert_allclose( - std_fd(inner_grid), - almost_std_fd(inner_grid), - rtol=1e-3, - ) - - outer_grid = grid[:inner_grid_limit] + grid[-inner_grid_limit:] - np.testing.assert_allclose( - std_fd(outer_grid), - almost_std_fd(outer_grid), - rtol=1e-2, - ) - - def test_std(self) -> None: - """Test standard deviation.""" - self._test_std_gaussian_fourier(61) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py new file mode 100644 index 000000000..5b487f730 --- /dev/null +++ b/skfda/tests/test_stats_std.py @@ -0,0 +1,83 @@ +"""Test stats functions.""" + +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest + +from skfda import FDataGrid +from skfda.datasets import make_gaussian_process +from skfda.exploratory.stats import std +from skfda.misc.covariances import Gaussian +from skfda.representation.basis import FourierBasis + + +@pytest.fixture(params=[61, 71]) +def n_basis(request: Any) -> int: + """Fixture for n_basis to test.""" + return request.param + + +@pytest.fixture +def start() -> int: + """Fixture for the infimum of the domain.""" + return 0 + + +@pytest.fixture +def stop() -> int: + """Fixture for the supremum of the domain.""" + return 1 + + +@pytest.fixture +def n_features() -> int: + """Fixture for the number of features.""" + return 1000 + + +@pytest.fixture +def gaussian_process(start: int, stop: int, n_features: int) -> FDataGrid: + """Fixture for a Gaussian process.""" + return make_gaussian_process( + start=start, + stop=stop, + n_samples=100, + n_features=n_features, + mean=0.0, + cov=Gaussian(variance=1, length_scale=0.1), + random_state=0, + ) + + +def test_std_gaussian_fourier( + start: int, + stop: int, + n_features: int, + n_basis: int, + gaussian_process: FDataGrid, +) -> None: + """Test standard deviation: Gaussian processes and a Fourier basis.""" + fourier_basis = FourierBasis(n_basis=n_basis, domain_range=(0, 1)) + fd = gaussian_process.to_basis(fourier_basis) + + std_fd = std(fd) + grid = np.linspace(start, stop, n_features) + almost_std_fd = std(fd.to_grid(grid)).to_basis(fourier_basis) + + inner_grid_limit = n_features // 10 + inner_grid = grid[inner_grid_limit:-inner_grid_limit] + np.testing.assert_allclose( + std_fd(inner_grid), + almost_std_fd(inner_grid), + rtol=1e-3, + ) + + outer_grid = grid[:inner_grid_limit] + grid[-inner_grid_limit:] + np.testing.assert_allclose( + std_fd(outer_grid), + almost_std_fd(outer_grid), + rtol=1e-2, + ) From 4c2757e19d4c868fce1233fe43c0af30ee6f4741 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Fri, 25 Aug 2023 17:24:58 +0200 Subject: [PATCH 07/29] Test for std_fdatagrid. --- skfda/tests/test_stats_std.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index 5b487f730..a2162f6cb 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -8,6 +8,7 @@ import pytest from skfda import FDataGrid +from skfda.typing._numpy import NDArrayFloat from skfda.datasets import make_gaussian_process from skfda.exploratory.stats import std from skfda.misc.covariances import Gaussian @@ -81,3 +82,29 @@ def test_std_gaussian_fourier( almost_std_fd(outer_grid), rtol=1e-2, ) + + +@pytest.mark.parametrize("fdatagrid, expected_std_data_matrix", [ + ( + FDataGrid( + data_matrix=[ + [[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]], + [[2, 3, 4, 5, 6, 7], [-2, -3, -4, -5, -6, -7]], + ], + grid_points=[ + [-2, -1], + [0, 1, 2, 3, 4, 5] + ], + ), + np.full((1, 2, 6, 1), np.sqrt(2)) + ), +]) +def test_std_fdatagrid( + fdatagrid: FDataGrid, + expected_std_data_matrix: NDArrayFloat, +) -> None: + """Test some FDataGrids' stds.""" + np.testing.assert_allclose( + std(fdatagrid).data_matrix, + expected_std_data_matrix + ) From 23b2d4e731bd5f23aacfd09466a11dce70361a24 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Fri, 25 Aug 2023 17:27:44 +0200 Subject: [PATCH 08/29] Fix style. --- skfda/tests/test_stats_std.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index a2162f6cb..d350e97c6 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -106,5 +106,5 @@ def test_std_fdatagrid( """Test some FDataGrids' stds.""" np.testing.assert_allclose( std(fdatagrid).data_matrix, - expected_std_data_matrix + expected_std_data_matrix, ) From 7137aba096d187870fbba5b3b8b8e23e252dcd9d Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 26 Aug 2023 07:57:05 +0200 Subject: [PATCH 09/29] Test for std_fdatagrid. --- skfda/tests/test_stats_std.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index d350e97c6..a6908669c 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -96,7 +96,26 @@ def test_std_gaussian_fourier( [0, 1, 2, 3, 4, 5] ], ), - np.full((1, 2, 6, 1), np.sqrt(2)) + np.full((1, 2, 6, 1), np.sqrt(2)), + ), + ( + FDataGrid( + data_matrix=[ + [ + [[10, 11], [10, 12], [11, 14]], + [[15, 16], [12, 15], [20, 13]], + ], + [ + [[11, 12], [11, 13], [12, 13]], + [[14, 15], [11, 16], [21, 12]], + ], + ], + grid_points=[ + [0, 1], + [0, 1, 2] + ], + ), + np.full((1, 2, 3, 2), np.sqrt(1/2)), ), ]) def test_std_fdatagrid( From 71f7071b0a928de9083640f95c5f685d9bb71b0e Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 26 Aug 2023 09:00:05 +0200 Subject: [PATCH 10/29] More than 1 evaluation at a time in std_fdatabasis --- skfda/exploratory/stats/_stats.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index c084b9354..c119e2ccc 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -145,17 +145,14 @@ def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: ) basis = X.basis - coeff_matrix = np.cov(X.coefficients, rowvar=False, ddof=ddof) + coeff_cov_matrix = np.cov(X.coefficients, rowvar=False, ddof=ddof) \ + .reshape((basis.n_basis, basis.n_basis)) def std_function(t_points: NDArrayFloat) -> NDArrayFloat: - assert len(t_points) == 1, ( - "Standard deviation function only implemented for " - "one-point-at-a-time evaluations." - ) - basis_evaluation = basis(t_points).reshape((-1, 1)) + basis_evaluation = basis(t_points).reshape((basis.n_basis, -1)) return np.sqrt( - basis_evaluation.T @ coeff_matrix @ basis_evaluation, - ).reshape((1, -1, 1)) + basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation, + ).reshape((1, -1, X.dim_codomain)) return function_to_fdatabasis(f=std_function, new_basis=X.basis) From 614aac5726fb37bebce066e41896d51b6bc3bb8b Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 27 Aug 2023 21:17:41 +0200 Subject: [PATCH 11/29] Fix test_stats_std style issues. --- skfda/tests/test_stats_std.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index a6908669c..ef1017c42 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -93,7 +93,7 @@ def test_std_gaussian_fourier( ], grid_points=[ [-2, -1], - [0, 1, 2, 3, 4, 5] + [0, 1, 2, 3, 4, 5], ], ), np.full((1, 2, 6, 1), np.sqrt(2)), @@ -112,17 +112,17 @@ def test_std_gaussian_fourier( ], grid_points=[ [0, 1], - [0, 1, 2] + [0, 1, 2], ], ), - np.full((1, 2, 3, 2), np.sqrt(1/2)), + np.full((1, 2, 3, 2), np.sqrt(1 / 2)), ), ]) def test_std_fdatagrid( fdatagrid: FDataGrid, expected_std_data_matrix: NDArrayFloat, ) -> None: - """Test some FDataGrids' stds.""" + """Test some std_fdatagrid cases.""" np.testing.assert_allclose( std(fdatagrid).data_matrix, expected_std_data_matrix, From 7a8ece04f7bcc929eba3445d129abe4d7a54daf4 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Mon, 28 Aug 2023 18:11:38 +0200 Subject: [PATCH 12/29] Exact std function improved to accept more than one input and to return multivalued (vector) outputs. Fix some pep8 issues. --- skfda/exploratory/stats/_stats.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index c119e2ccc..9228c74d0 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -126,7 +126,7 @@ def std(X: F, ddof: int = 1) -> F: @std.register(FDataGrid) def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: - """Standard deviation of a FDataGrid.""" + """Compute the standard deviation of a FDataGrid.""" return X.copy( data_matrix=np.std(X.data_matrix, axis=0, ddof=ddof)[np.newaxis, ...], sample_names=("standard deviation",), @@ -135,23 +135,23 @@ def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: @std.register(FDataBasis) def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: - """Standard deviation of a FDataBasis.""" + """Compute the standard deviation of a FDataBasis.""" from ..._utils import function_to_fdatabasis if X.dim_domain != 1 or X.dim_codomain != 1: raise NotImplementedError( - "Standard deviation only implemented " - "for univariate functions.", + "Standard deviation only implemented for univariate functions.", ) basis = X.basis - coeff_cov_matrix = np.cov(X.coefficients, rowvar=False, ddof=ddof) \ - .reshape((basis.n_basis, basis.n_basis)) + coeff_cov_matrix = np.cov( + X.coefficients, rowvar=False, ddof=ddof, + ).reshape((basis.n_basis, basis.n_basis)) def std_function(t_points: NDArrayFloat) -> NDArrayFloat: basis_evaluation = basis(t_points).reshape((basis.n_basis, -1)) return np.sqrt( - basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation, + np.diag(basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation), ).reshape((1, -1, X.dim_codomain)) return function_to_fdatabasis(f=std_function, new_basis=X.basis) From eb80f31013dd15a7de2993374853fb681f1ba8c8 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Mon, 4 Sep 2023 19:19:01 +0200 Subject: [PATCH 13/29] Undo test_stats changes --- skfda/tests/test_stats.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/skfda/tests/test_stats.py b/skfda/tests/test_stats.py index 79f96ea8e..22b74a437 100644 --- a/skfda/tests/test_stats.py +++ b/skfda/tests/test_stats.py @@ -2,15 +2,8 @@ import numpy as np -from skfda.datasets import ( - fetch_phoneme, - fetch_tecator, - fetch_weather, -) -from skfda.exploratory.stats import ( - geometric_median, - modified_epigraph_index, -) +from skfda.datasets import fetch_phoneme, fetch_tecator, fetch_weather +from skfda.exploratory.stats import geometric_median, modified_epigraph_index class TestGeometricMedian(unittest.TestCase): From 32233c98bc5f0ca949b46b7e4f72e04fed84ff78 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Mon, 4 Sep 2023 19:39:50 +0200 Subject: [PATCH 14/29] Import order --- skfda/tests/test_stats_std.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index ef1017c42..86d818a9d 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -8,11 +8,11 @@ import pytest from skfda import FDataGrid -from skfda.typing._numpy import NDArrayFloat from skfda.datasets import make_gaussian_process from skfda.exploratory.stats import std from skfda.misc.covariances import Gaussian from skfda.representation.basis import FourierBasis +from skfda.typing._numpy import NDArrayFloat @pytest.fixture(params=[61, 71]) From 026d4883999f524ea9b339df00888cec19642f0a Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 14 Sep 2023 13:52:39 +0200 Subject: [PATCH 15/29] std_fdatabasis with dim>1 (domain or/and codomain) + tests --- skfda/exploratory/stats/_stats.py | 5 -- skfda/tests/test_stats_std.py | 82 ++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 9228c74d0..4cf784ef3 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -138,11 +138,6 @@ def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: """Compute the standard deviation of a FDataBasis.""" from ..._utils import function_to_fdatabasis - if X.dim_domain != 1 or X.dim_codomain != 1: - raise NotImplementedError( - "Standard deviation only implemented for univariate functions.", - ) - basis = X.basis coeff_cov_matrix = np.cov( X.coefficients, rowvar=False, ddof=ddof, diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index 86d818a9d..a37196a31 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -7,11 +7,16 @@ import numpy as np import pytest -from skfda import FDataGrid +from skfda import FDataBasis, FDataGrid from skfda.datasets import make_gaussian_process from skfda.exploratory.stats import std from skfda.misc.covariances import Gaussian -from skfda.representation.basis import FourierBasis +from skfda.representation.basis import ( + FourierBasis, + MonomialBasis, + TensorBasis, + VectorValuedBasis, +) from skfda.typing._numpy import NDArrayFloat @@ -127,3 +132,76 @@ def test_std_fdatagrid( std(fdatagrid).data_matrix, expected_std_data_matrix, ) + + +@pytest.mark.parametrize("fdatabasis, expected_std_coefficients", [ + ( + FDataBasis( + basis=VectorValuedBasis([ + MonomialBasis(domain_range=(0, 1), n_basis=3), + MonomialBasis(domain_range=(0, 1), n_basis=3), + ]), + coefficients=[ + [0, 0, 0, 0, 0, 0], + [1, 0, 0, 1, 0, 0], + ], + ), + np.array([[np.sqrt(1 / 2), 0, 0, np.sqrt(1 / 2), 0, 0]]), + ), + ( + FDataBasis( + basis=VectorValuedBasis([ + FourierBasis(domain_range=(0, 1), n_basis=5), + MonomialBasis(domain_range=(0, 1), n_basis=4), + ]), + coefficients=[ + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 1, 0, 0, 0], + ], + ), + np.array([[np.sqrt(1 / 2), 0, 0, 0, 0, np.sqrt(1 / 2), 0, 0, 0]]), + ), + ( + FDataBasis( + basis=TensorBasis([ + MonomialBasis(domain_range=(0, 1), n_basis=4), + MonomialBasis(domain_range=(0, 1), n_basis=4), + ]), + coefficients=[ + np.zeros(16), + np.pad([1], (0, 15)), + ], + ), + [np.pad([np.sqrt(1 / 2)], (0, 15))], + ), + ( + FDataBasis( + basis=VectorValuedBasis([ + TensorBasis([ + MonomialBasis(domain_range=(0, 1), n_basis=2), + MonomialBasis(domain_range=(0, 1), n_basis=2), + ]), + TensorBasis([ + MonomialBasis(domain_range=(0, 1), n_basis=2), + MonomialBasis(domain_range=(0, 1), n_basis=2), + ]), + ]), + coefficients=[ + [0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 1, 0, 0, 0], + ], + ), + np.array([[np.sqrt(1 / 2), 0, 0, 0] * 2]), + ), +]) +def test_std_fdatabasis( + fdatabasis: FDataBasis, + expected_std_coefficients: NDArrayFloat, +) -> None: + """Test some std_fdatabasis cases.""" + np.testing.assert_allclose( + std(fdatabasis).coefficients, + expected_std_coefficients, + rtol=1e-7, + atol=1e-7, + ) From 88c7fd443d9ab2542fb988ef6845174781d85a16 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 17 Sep 2023 12:44:44 +0200 Subject: [PATCH 16/29] Do not return input in function_to_fdatabasis, but a copy. No sample names for std_fdatagrid. --- skfda/_utils/_utils.py | 2 +- skfda/exploratory/stats/_stats.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index fd3dcfeb7..d35617130 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -624,7 +624,7 @@ def function_to_fdatabasis( from ..misc._math import inner_product_matrix if isinstance(f, FDataBasis) and f.basis == new_basis: - return f + return f.copy() inner_prod = inner_product_matrix( new_basis, diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 4cf784ef3..c2267190d 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -129,7 +129,7 @@ def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: """Compute the standard deviation of a FDataGrid.""" return X.copy( data_matrix=np.std(X.data_matrix, axis=0, ddof=ddof)[np.newaxis, ...], - sample_names=("standard deviation",), + sample_names=(None,), ) From 0457e39e02119f1bfa675e422d68839aff2fda1d Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 17 Sep 2023 12:45:32 +0200 Subject: [PATCH 17/29] Fix test_stats_std readability. Remove confusing Gaussian processes test. --- skfda/tests/test_stats_std.py | 316 +++++++++++++++++----------------- 1 file changed, 159 insertions(+), 157 deletions(-) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index a37196a31..250d78650 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -8,200 +8,202 @@ import pytest from skfda import FDataBasis, FDataGrid -from skfda.datasets import make_gaussian_process from skfda.exploratory.stats import std -from skfda.misc.covariances import Gaussian from skfda.representation.basis import ( + Basis, + BSplineBasis, FourierBasis, MonomialBasis, TensorBasis, VectorValuedBasis, ) -from skfda.typing._numpy import NDArrayFloat -@pytest.fixture(params=[61, 71]) -def n_basis(request: Any) -> int: - """Fixture for n_basis to test.""" - return request.param +# Fixtures for test_std_fdatabasis_vector_valued_basis - -@pytest.fixture -def start() -> int: - """Fixture for the infimum of the domain.""" - return 0 +@pytest.fixture(params=[3, 5]) +def vv_n_basis1(request: Any) -> int: + """n_basis for 1st coordinate of vector valued basis.""" + return request.param # type: ignore -@pytest.fixture -def stop() -> int: - """Fixture for the supremum of the domain.""" - return 1 +@pytest.fixture(params=[3]) +def vv_n_basis2(request: Any) -> int: + """n_basis for 2nd coordinate of vector valued basis.""" + return request.param # type: ignore @pytest.fixture -def n_features() -> int: - """Fixture for the number of features.""" - return 1000 +def vv_basis1(vv_n_basis1: int) -> Basis: + """1-dimensional basis to test for vector valued basis.""" + # First element of the basis is assumed to be the 1 function + return BSplineBasis( + n_basis=vv_n_basis1, + domain_range=(0, 1), + order=vv_n_basis1 - 1, + ) -@pytest.fixture -def gaussian_process(start: int, stop: int, n_features: int) -> FDataGrid: - """Fixture for a Gaussian process.""" - return make_gaussian_process( - start=start, - stop=stop, - n_samples=100, - n_features=n_features, - mean=0.0, - cov=Gaussian(variance=1, length_scale=0.1), - random_state=0, +@pytest.fixture(params=[FourierBasis, MonomialBasis]) +def vv_basis2(request: Any, vv_n_basis2: int) -> Basis: + """1-dimensional basis to test for vector valued basis.""" + # First element of the basis is assumed to be the 1 function + return request.param( # type: ignore + domain_range=(0, 1), n_basis=vv_n_basis2, ) -def test_std_gaussian_fourier( - start: int, - stop: int, - n_features: int, - n_basis: int, - gaussian_process: FDataGrid, -) -> None: - """Test standard deviation: Gaussian processes and a Fourier basis.""" - fourier_basis = FourierBasis(n_basis=n_basis, domain_range=(0, 1)) - fd = gaussian_process.to_basis(fourier_basis) +# Fixtures for test_std_fdatabasis_tensor_basis - std_fd = std(fd) - grid = np.linspace(start, stop, n_features) - almost_std_fd = std(fd.to_grid(grid)).to_basis(fourier_basis) +@pytest.fixture(params=[3]) +def t_n_basis1(request: Any) -> int: + """n_basis for 1st input argument of tensor basis.""" + return request.param # type: ignore - inner_grid_limit = n_features // 10 - inner_grid = grid[inner_grid_limit:-inner_grid_limit] - np.testing.assert_allclose( - std_fd(inner_grid), - almost_std_fd(inner_grid), - rtol=1e-3, + +@pytest.fixture(params=[5]) +def t_n_basis2(request: Any) -> int: + """n_basis for 2nd input argument of tensor basis.""" + return request.param # type: ignore + + +@pytest.fixture(params=[FourierBasis]) +def t_basis1(request: Any, t_n_basis2: int) -> Basis: + """1-dimensional basis to test for tensor basis.""" + # First element of the basis is assumed to be the 1 function + return request.param( # type: ignore + domain_range=(0, 1), n_basis=t_n_basis2, ) - outer_grid = grid[:inner_grid_limit] + grid[-inner_grid_limit:] + +@pytest.fixture(params=[MonomialBasis]) +def t_basis2(request: Any, t_n_basis2: int) -> Basis: + """1-dimensional basis to test for tensor basis.""" + # First element of the basis is assumed to be the 1 function + return request.param( # type: ignore + domain_range=(0, 1), n_basis=t_n_basis2, + ) + + +# Tests + +def test_std_fdatagrid_1d_to_2d() -> None: + """Test std_fdatagrid with R to R^2 functions.""" + fd = FDataGrid( + data_matrix=[ + [[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]], + [[2, 3, 4, 5, 6, 7], [-2, -3, -4, -5, -6, -7]], + ], + grid_points=[ + [-2, -1], + [0, 1, 2, 3, 4, 5], + ], + ) + expected_std_data_matrix = np.full((1, 2, 6, 1), np.sqrt(2)) np.testing.assert_allclose( - std_fd(outer_grid), - almost_std_fd(outer_grid), - rtol=1e-2, + std(fd).data_matrix, + expected_std_data_matrix, ) -@pytest.mark.parametrize("fdatagrid, expected_std_data_matrix", [ - ( - FDataGrid( - data_matrix=[ - [[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]], - [[2, 3, 4, 5, 6, 7], [-2, -3, -4, -5, -6, -7]], - ], - grid_points=[ - [-2, -1], - [0, 1, 2, 3, 4, 5], +def test_std_fdatagrid_2d_to_2d() -> None: + """Test std_fdatagrid with R to R^2 functions.""" + fd = FDataGrid( + data_matrix=[ + [ + [[10, 11], [10, 12], [11, 14]], + [[15, 16], [12, 15], [20, 13]], ], - ), - np.full((1, 2, 6, 1), np.sqrt(2)), - ), - ( - FDataGrid( - data_matrix=[ - [ - [[10, 11], [10, 12], [11, 14]], - [[15, 16], [12, 15], [20, 13]], - ], - [ - [[11, 12], [11, 13], [12, 13]], - [[14, 15], [11, 16], [21, 12]], - ], + [ + [[11, 12], [11, 13], [12, 13]], + [[14, 15], [11, 16], [21, 12]], ], - grid_points=[ - [0, 1], - [0, 1, 2], - ], - ), - np.full((1, 2, 3, 2), np.sqrt(1 / 2)), - ), -]) -def test_std_fdatagrid( - fdatagrid: FDataGrid, - expected_std_data_matrix: NDArrayFloat, -) -> None: - """Test some std_fdatagrid cases.""" + ], + grid_points=[ + [0, 1], + [0, 1, 2], + ], + ) + expected_std_data_matrix = np.full((1, 2, 3, 2), np.sqrt(1 / 2)) np.testing.assert_allclose( - std(fdatagrid).data_matrix, + std(fd).data_matrix, expected_std_data_matrix, ) -@pytest.mark.parametrize("fdatabasis, expected_std_coefficients", [ - ( - FDataBasis( - basis=VectorValuedBasis([ - MonomialBasis(domain_range=(0, 1), n_basis=3), - MonomialBasis(domain_range=(0, 1), n_basis=3), - ]), - coefficients=[ - [0, 0, 0, 0, 0, 0], - [1, 0, 0, 1, 0, 0], - ], - ), - np.array([[np.sqrt(1 / 2), 0, 0, np.sqrt(1 / 2), 0, 0]]), - ), - ( - FDataBasis( - basis=VectorValuedBasis([ - FourierBasis(domain_range=(0, 1), n_basis=5), - MonomialBasis(domain_range=(0, 1), n_basis=4), - ]), - coefficients=[ - [0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 0, 0, 0, 0, 1, 0, 0, 0], - ], - ), - np.array([[np.sqrt(1 / 2), 0, 0, 0, 0, np.sqrt(1 / 2), 0, 0, 0]]), - ), - ( - FDataBasis( - basis=TensorBasis([ - MonomialBasis(domain_range=(0, 1), n_basis=4), - MonomialBasis(domain_range=(0, 1), n_basis=4), - ]), - coefficients=[ - np.zeros(16), - np.pad([1], (0, 15)), - ], - ), - [np.pad([np.sqrt(1 / 2)], (0, 15))], - ), - ( - FDataBasis( - basis=VectorValuedBasis([ - TensorBasis([ - MonomialBasis(domain_range=(0, 1), n_basis=2), - MonomialBasis(domain_range=(0, 1), n_basis=2), - ]), - TensorBasis([ - MonomialBasis(domain_range=(0, 1), n_basis=2), - MonomialBasis(domain_range=(0, 1), n_basis=2), - ]), - ]), - coefficients=[ - [0, 0, 0, 0, 0, 0, 0, 0], - [1, 0, 0, 0, 1, 0, 0, 0], - ], - ), - np.array([[np.sqrt(1 / 2), 0, 0, 0] * 2]), - ), -]) -def test_std_fdatabasis( - fdatabasis: FDataBasis, - expected_std_coefficients: NDArrayFloat, +def test_std_fdatabasis_vector_valued_basis( + vv_basis1: Basis, + vv_basis2: Basis, +) -> None: + """Test std_fdatabasis with a vector valued basis.""" + basis = VectorValuedBasis([vv_basis1, vv_basis2]) + + # coefficients of the function===(1, 1) + one_coefficients = np.concatenate(( + np.pad([1], (0, vv_basis1.n_basis - 1)), + np.pad([1], (0, vv_basis2.n_basis - 1)), + )) + + fd = FDataBasis( + basis=basis, + coefficients=[np.zeros(basis.n_basis), one_coefficients], + ) + + np.testing.assert_allclose( + std(fd).coefficients, + np.array([np.sqrt(1 / 2) * one_coefficients]), + rtol=1e-7, + atol=1e-7, + ) + + +def test_std_fdatabasis_tensor_basis( + t_basis1: Basis, + t_basis2: Basis, ) -> None: - """Test some std_fdatabasis cases.""" + """Test std_fdatabasis with a vector valued basis.""" + basis = TensorBasis([t_basis1, t_basis2]) + + # coefficients of the function===1 + one_coefficients = np.pad([1], (0, basis.n_basis - 1)) + + fd = FDataBasis( + basis=basis, + coefficients=[np.zeros(basis.n_basis), one_coefficients], + ) + + np.testing.assert_allclose( + std(fd).coefficients, + np.array([np.sqrt(1 / 2) * one_coefficients]), + rtol=1e-7, + atol=1e-7, + ) + + +def test_std_fdatabasis_2d_to_2d() -> None: + """Test std_fdatabasis with R^2 to R^2 basis.""" + basis = VectorValuedBasis([ + TensorBasis([ + MonomialBasis(domain_range=(0, 1), n_basis=2), + MonomialBasis(domain_range=(0, 1), n_basis=2), + ]), + TensorBasis([ + MonomialBasis(domain_range=(0, 1), n_basis=2), + MonomialBasis(domain_range=(0, 1), n_basis=2), + ]), + ]) + fd = FDataBasis( + basis=basis, + coefficients=[ + [0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 1, 0, 0, 0], + ], + ) + expected_coefficients = np.array([[np.sqrt(1 / 2), 0, 0, 0] * 2]) + np.testing.assert_allclose( - std(fdatabasis).coefficients, - expected_std_coefficients, + std(fd).coefficients, + expected_coefficients, rtol=1e-7, atol=1e-7, ) From ccb8ed495e6d2a8c41461276d7eb24c2c7ee3b2a Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 17 Sep 2023 13:11:14 +0200 Subject: [PATCH 18/29] mypy ignore[no-any-return] on fixtures --- skfda/tests/test_stats_std.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index 250d78650..f4c10b8a1 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -18,19 +18,18 @@ VectorValuedBasis, ) - # Fixtures for test_std_fdatabasis_vector_valued_basis @pytest.fixture(params=[3, 5]) def vv_n_basis1(request: Any) -> int: """n_basis for 1st coordinate of vector valued basis.""" - return request.param # type: ignore + return request.param # type: ignore[no-any-return] @pytest.fixture(params=[3]) def vv_n_basis2(request: Any) -> int: """n_basis for 2nd coordinate of vector valued basis.""" - return request.param # type: ignore + return request.param # type: ignore[no-any-return] @pytest.fixture @@ -48,7 +47,7 @@ def vv_basis1(vv_n_basis1: int) -> Basis: def vv_basis2(request: Any, vv_n_basis2: int) -> Basis: """1-dimensional basis to test for vector valued basis.""" # First element of the basis is assumed to be the 1 function - return request.param( # type: ignore + return request.param( # type: ignore[no-any-return] domain_range=(0, 1), n_basis=vv_n_basis2, ) @@ -58,20 +57,20 @@ def vv_basis2(request: Any, vv_n_basis2: int) -> Basis: @pytest.fixture(params=[3]) def t_n_basis1(request: Any) -> int: """n_basis for 1st input argument of tensor basis.""" - return request.param # type: ignore + return request.param # type: ignore[no-any-return] @pytest.fixture(params=[5]) def t_n_basis2(request: Any) -> int: """n_basis for 2nd input argument of tensor basis.""" - return request.param # type: ignore + return request.param # type: ignore[no-any-return] @pytest.fixture(params=[FourierBasis]) def t_basis1(request: Any, t_n_basis2: int) -> Basis: """1-dimensional basis to test for tensor basis.""" # First element of the basis is assumed to be the 1 function - return request.param( # type: ignore + return request.param( # type: ignore[no-any-return] domain_range=(0, 1), n_basis=t_n_basis2, ) @@ -80,7 +79,7 @@ def t_basis1(request: Any, t_n_basis2: int) -> Basis: def t_basis2(request: Any, t_n_basis2: int) -> Basis: """1-dimensional basis to test for tensor basis.""" # First element of the basis is assumed to be the 1 function - return request.param( # type: ignore + return request.param( # type: ignore[no-any-return] domain_range=(0, 1), n_basis=t_n_basis2, ) From c7f70a18a01d81635926e90590b9368604d604ba Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 17 Sep 2023 13:34:48 +0200 Subject: [PATCH 19/29] Fix typing issues in std_function inside of std_fdatabasis --- skfda/exploratory/stats/_stats.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index c2267190d..4af295b0c 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -145,9 +145,12 @@ def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: def std_function(t_points: NDArrayFloat) -> NDArrayFloat: basis_evaluation = basis(t_points).reshape((basis.n_basis, -1)) - return np.sqrt( - np.diag(basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation), - ).reshape((1, -1, X.dim_codomain)) + return np.reshape( + np.sqrt(np.diag( + basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation, + )), + (1, -1, X.dim_codomain), + ) return function_to_fdatabasis(f=std_function, new_basis=X.basis) From 0b51aa523e73e492747802bc3218706a343033c1 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 17 Sep 2023 13:48:30 +0200 Subject: [PATCH 20/29] Reorganize std_function for clarity --- skfda/exploratory/stats/_stats.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 4af295b0c..9475e82d6 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -145,12 +145,10 @@ def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: def std_function(t_points: NDArrayFloat) -> NDArrayFloat: basis_evaluation = basis(t_points).reshape((basis.n_basis, -1)) - return np.reshape( - np.sqrt(np.diag( - basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation, - )), - (1, -1, X.dim_codomain), + std_values = np.sqrt( + np.diag(basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation) ) + return np.reshape(std_values, (1, -1, X.dim_codomain)) return function_to_fdatabasis(f=std_function, new_basis=X.basis) From a0d7d7e500a49947a7bca004cc9e43b3ce96ad8d Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 17 Sep 2023 14:03:43 +0200 Subject: [PATCH 21/29] import FDatabasis ignore warning in function_to_fdatabasis --- skfda/_utils/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index d35617130..aebb5189f 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -620,7 +620,7 @@ def function_to_fdatabasis( FDataBasis: FDataBasis with calculated coefficients and the new basis. """ - from .. import FDataBasis + from .. import FDataBasis # noqa: WPS442 from ..misc._math import inner_product_matrix if isinstance(f, FDataBasis) and f.basis == new_basis: From 0c11d71f5e2f7646c267f79c74d544702fe67012 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sun, 17 Sep 2023 14:40:02 +0200 Subject: [PATCH 22/29] Fix style --- skfda/exploratory/stats/_stats.py | 2 +- skfda/tests/test_stats_std.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 9475e82d6..0d6cd01b0 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -146,7 +146,7 @@ def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: def std_function(t_points: NDArrayFloat) -> NDArrayFloat: basis_evaluation = basis(t_points).reshape((basis.n_basis, -1)) std_values = np.sqrt( - np.diag(basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation) + np.diag(basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation), ) return np.reshape(std_values, (1, -1, X.dim_codomain)) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index f4c10b8a1..ce6945af7 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -20,6 +20,7 @@ # Fixtures for test_std_fdatabasis_vector_valued_basis + @pytest.fixture(params=[3, 5]) def vv_n_basis1(request: Any) -> int: """n_basis for 1st coordinate of vector valued basis.""" From 533d13b571a1529dd2933d95819b8df72016b851 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 20 Sep 2023 15:58:31 +0200 Subject: [PATCH 23/29] Add std to stats.rst --- docs/modules/exploratory/stats.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/modules/exploratory/stats.rst b/docs/modules/exploratory/stats.rst index 17642a732..b55b05fe6 100644 --- a/docs/modules/exploratory/stats.rst +++ b/docs/modules/exploratory/stats.rst @@ -31,4 +31,5 @@ statistics can be used. skfda.exploratory.stats.cov skfda.exploratory.stats.var + skfda.exploratory.stats.std From f3845b5c0e9666cb46419f40f8686ade2922ece2 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 20 Sep 2023 16:01:15 +0200 Subject: [PATCH 24/29] Fix type checking for function_to_fdatabasis --- skfda/_utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 838328a6c..bb0636fdd 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -45,9 +45,9 @@ _same_domain as _same_domain, _to_grid as _to_grid, _to_grid_points as _to_grid_points, + function_to_fdatabasis as function_to_fdatabasis, nquad_vec as nquad_vec, ) - from ._warping import ( invert_warping as invert_warping, normalize_scale as normalize_scale, From 21967db52a97c021692bc073f683b1817a4b4c18 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 20 Sep 2023 16:11:03 +0200 Subject: [PATCH 25/29] Isort --- skfda/exploratory/stats/_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 0d6cd01b0..ca8e88dc2 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -1,10 +1,10 @@ """Functional data descriptive statistics.""" from __future__ import annotations +import functools from builtins import isinstance from typing import Callable, TypeVar, Union -import functools import numpy as np from scipy import integrate from scipy.stats import rankdata From ce105d9dab78ec43307e075c29a496c71b96d44d Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 20 Sep 2023 16:25:13 +0200 Subject: [PATCH 26/29] Fix nested function warning --- skfda/exploratory/stats/_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index ca8e88dc2..40cee7382 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -143,7 +143,7 @@ def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: X.coefficients, rowvar=False, ddof=ddof, ).reshape((basis.n_basis, basis.n_basis)) - def std_function(t_points: NDArrayFloat) -> NDArrayFloat: + def std_function(t_points: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430 basis_evaluation = basis(t_points).reshape((basis.n_basis, -1)) std_values = np.sqrt( np.diag(basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation), From 5509c0022b02c6ec9ca602d43d81ef026b44c65e Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 4 Oct 2023 14:56:59 +0200 Subject: [PATCH 27/29] Fix tests, remove type from std.register --- skfda/exploratory/stats/_stats.py | 4 ++-- skfda/tests/test_stats_std.py | 33 ++++++------------------------- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 40cee7382..af0bb1da0 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -124,7 +124,7 @@ def std(X: F, ddof: int = 1) -> F: raise NotImplementedError("Not implemented for this type") -@std.register(FDataGrid) +@std.register def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: """Compute the standard deviation of a FDataGrid.""" return X.copy( @@ -133,7 +133,7 @@ def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: ) -@std.register(FDataBasis) +@std.register def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: """Compute the standard deviation of a FDataBasis.""" from ..._utils import function_to_fdatabasis diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index ce6945af7..24f6687af 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -11,7 +11,6 @@ from skfda.exploratory.stats import std from skfda.representation.basis import ( Basis, - BSplineBasis, FourierBasis, MonomialBasis, TensorBasis, @@ -27,25 +26,17 @@ def vv_n_basis1(request: Any) -> int: return request.param # type: ignore[no-any-return] -@pytest.fixture(params=[3]) -def vv_n_basis2(request: Any) -> int: - """n_basis for 2nd coordinate of vector valued basis.""" - return request.param # type: ignore[no-any-return] - - @pytest.fixture def vv_basis1(vv_n_basis1: int) -> Basis: """1-dimensional basis to test for vector valued basis.""" # First element of the basis is assumed to be the 1 function - return BSplineBasis( - n_basis=vv_n_basis1, - domain_range=(0, 1), - order=vv_n_basis1 - 1, + return MonomialBasis( + n_basis=vv_n_basis1, domain_range=(0, 1), ) @pytest.fixture(params=[FourierBasis, MonomialBasis]) -def vv_basis2(request: Any, vv_n_basis2: int) -> Basis: +def vv_basis2(request: Any, vv_n_basis2: int = 3) -> Basis: """1-dimensional basis to test for vector valued basis.""" # First element of the basis is assumed to be the 1 function return request.param( # type: ignore[no-any-return] @@ -55,29 +46,17 @@ def vv_basis2(request: Any, vv_n_basis2: int) -> Basis: # Fixtures for test_std_fdatabasis_tensor_basis -@pytest.fixture(params=[3]) -def t_n_basis1(request: Any) -> int: - """n_basis for 1st input argument of tensor basis.""" - return request.param # type: ignore[no-any-return] - - -@pytest.fixture(params=[5]) -def t_n_basis2(request: Any) -> int: - """n_basis for 2nd input argument of tensor basis.""" - return request.param # type: ignore[no-any-return] - - @pytest.fixture(params=[FourierBasis]) -def t_basis1(request: Any, t_n_basis2: int) -> Basis: +def t_basis1(request: Any, t_n_basis1: int = 3) -> Basis: """1-dimensional basis to test for tensor basis.""" # First element of the basis is assumed to be the 1 function return request.param( # type: ignore[no-any-return] - domain_range=(0, 1), n_basis=t_n_basis2, + domain_range=(0, 1), n_basis=t_n_basis1, ) @pytest.fixture(params=[MonomialBasis]) -def t_basis2(request: Any, t_n_basis2: int) -> Basis: +def t_basis2(request: Any, t_n_basis2: int = 5) -> Basis: """1-dimensional basis to test for tensor basis.""" # First element of the basis is assumed to be the 1 function return request.param( # type: ignore[no-any-return] From 6b5e47d152bda6d5f33a0aebc5ff8962cadc26b6 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 12 Oct 2023 11:27:47 +0200 Subject: [PATCH 28/29] sum instead of diag to compute std in std_fdatagrid integrand --- skfda/exploratory/stats/_stats.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index af0bb1da0..903261590 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -146,7 +146,10 @@ def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: def std_function(t_points: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430 basis_evaluation = basis(t_points).reshape((basis.n_basis, -1)) std_values = np.sqrt( - np.diag(basis_evaluation.T @ coeff_cov_matrix @ basis_evaluation), + np.sum( + basis_evaluation * (coeff_cov_matrix @ basis_evaluation), + axis=0, + ), ) return np.reshape(std_values, (1, -1, X.dim_codomain)) From bb5923d6138ecb1ae1dc0fe506324d88f548d3cd Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 12 Oct 2023 11:30:45 +0200 Subject: [PATCH 29/29] Rename ddof -> correction --- skfda/exploratory/stats/_stats.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 009261497..5bf81b3b9 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -103,20 +103,20 @@ def cov( @functools.singledispatch -def std(X: F, ddof: int = 1) -> F: +def std(X: F, correction: int = 1) -> F: r""" Compute the standard deviation of all the samples in a FData object. .. math:: - \text{std}_X(t) = \sqrt{\frac{1}{N-\text{ddof}} + \text{std}_X(t) = \sqrt{\frac{1}{N-\text{correction}} \sum_{n=1}^{N}{\left(X_n(t) - \overline{X}(t)\right)^2}} Args: X: Object containing all the samples whose standard deviation is wanted. - ddof: Means "Delta Degrees of Freedom". The divisor used in - calculations is `N - ddof`, where `N` represents the number of - samples in `X`. By default ddof is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the number of + elements. Default: `0`. Returns: Standard deviation of all the samples in the original object, as a @@ -127,22 +127,24 @@ def std(X: F, ddof: int = 1) -> F: @std.register -def std_fdatagrid(X: FDataGrid, ddof: int = 1) -> FDataGrid: +def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid: """Compute the standard deviation of a FDataGrid.""" return X.copy( - data_matrix=np.std(X.data_matrix, axis=0, ddof=ddof)[np.newaxis, ...], + data_matrix=np.std( + X.data_matrix, axis=0, ddof=correction, + )[np.newaxis, ...], sample_names=(None,), ) @std.register -def std_fdatabasis(X: FDataBasis, ddof: int = 1) -> FDataBasis: +def std_fdatabasis(X: FDataBasis, correction: int = 1) -> FDataBasis: """Compute the standard deviation of a FDataBasis.""" from ..._utils import function_to_fdatabasis basis = X.basis coeff_cov_matrix = np.cov( - X.coefficients, rowvar=False, ddof=ddof, + X.coefficients, rowvar=False, ddof=correction, ).reshape((basis.n_basis, basis.n_basis)) def std_function(t_points: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430