diff --git a/docs/modules/exploratory/stats.rst b/docs/modules/exploratory/stats.rst index 17642a732..b55b05fe6 100644 --- a/docs/modules/exploratory/stats.rst +++ b/docs/modules/exploratory/stats.rst @@ -31,4 +31,5 @@ statistics can be used. skfda.exploratory.stats.cov skfda.exploratory.stats.var + skfda.exploratory.stats.std diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 387f22b58..bb0636fdd 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -20,6 +20,7 @@ "_same_domain", "_to_grid", "_to_grid_points", + "function_to_fdatabasis", "nquad_vec", ], '_warping': [ @@ -44,9 +45,9 @@ _same_domain as _same_domain, _to_grid as _to_grid, _to_grid_points as _to_grid_points, + function_to_fdatabasis as function_to_fdatabasis, nquad_vec as nquad_vec, ) - from ._warping import ( invert_warping as invert_warping, normalize_scale as normalize_scale, diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 9a6382b57..aebb5189f 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -4,7 +4,6 @@ import functools import numbers -from functools import singledispatch from typing import ( TYPE_CHECKING, Any, @@ -36,7 +35,7 @@ ArrayDTypeT = TypeVar("ArrayDTypeT", bound="np.generic") if TYPE_CHECKING: - from ..representation import FData, FDataGrid + from ..representation import FData, FDataBasis, FDataGrid from ..representation.basis import Basis from ..representation.extrapolation import ExtrapolationLike @@ -605,3 +604,36 @@ def _classifier_get_classes( f'one; got {classes.size} class', ) return classes, y_ind + + +def function_to_fdatabasis( + f: Callable[[NDArrayFloat], NDArrayFloat], + new_basis: Basis, +) -> FDataBasis: + """Express a math function as a FDataBasis with a given basis. + + Args: + f: math function. + new_basis: the basis of the output. + + Returns: + FDataBasis: FDataBasis with calculated coefficients and the new + basis. + """ + from .. import FDataBasis # noqa: WPS442 + from ..misc._math import inner_product_matrix + + if isinstance(f, FDataBasis) and f.basis == new_basis: + return f.copy() + + inner_prod = inner_product_matrix( + new_basis, + f, + _domain_range=new_basis.domain_range, + ) + + gram_matrix = new_basis.gram_matrix() + + coefs = np.linalg.solve(gram_matrix, inner_prod) + + return FDataBasis(new_basis, coefs.T) diff --git a/skfda/exploratory/stats/__init__.py b/skfda/exploratory/stats/__init__.py index 0a1f3a6de..345eb30ac 100644 --- a/skfda/exploratory/stats/__init__.py +++ b/skfda/exploratory/stats/__init__.py @@ -19,6 +19,7 @@ "gmean", "mean", "modified_epigraph_index", + "std", "trim_mean", "var", ], @@ -37,6 +38,7 @@ gmean as gmean, mean as mean, modified_epigraph_index as modified_epigraph_index, + std as std, trim_mean as trim_mean, var as var, ) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 8d5b2478c..5bf81b3b9 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -1,6 +1,7 @@ """Functional data descriptive statistics.""" from __future__ import annotations +import functools from builtins import isinstance from typing import Callable, TypeVar, Union @@ -11,7 +12,7 @@ from skfda._utils.ndfunction import average_function_value from ...misc.metrics._lp_distances import l2_distance -from ...representation import FData, FDataGrid +from ...representation import FData, FDataBasis, FDataGrid from ...typing._metric import Metric from ...typing._numpy import NDArrayFloat from ..depth import Depth, ModifiedBandDepth @@ -101,6 +102,64 @@ def cov( return X.cov(correction=correction) +@functools.singledispatch +def std(X: F, correction: int = 1) -> F: + r""" + Compute the standard deviation of all the samples in a FData object. + + .. math:: + \text{std}_X(t) = \sqrt{\frac{1}{N-\text{correction}} + \sum_{n=1}^{N}{\left(X_n(t) - \overline{X}(t)\right)^2}} + + Args: + X: Object containing all the samples whose standard deviation is + wanted. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the number of + elements. Default: `0`. + + Returns: + Standard deviation of all the samples in the original object, as a + :term:`functional data object` with just one sample. + + """ + raise NotImplementedError("Not implemented for this type") + + +@std.register +def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid: + """Compute the standard deviation of a FDataGrid.""" + return X.copy( + data_matrix=np.std( + X.data_matrix, axis=0, ddof=correction, + )[np.newaxis, ...], + sample_names=(None,), + ) + + +@std.register +def std_fdatabasis(X: FDataBasis, correction: int = 1) -> FDataBasis: + """Compute the standard deviation of a FDataBasis.""" + from ..._utils import function_to_fdatabasis + + basis = X.basis + coeff_cov_matrix = np.cov( + X.coefficients, rowvar=False, ddof=correction, + ).reshape((basis.n_basis, basis.n_basis)) + + def std_function(t_points: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430 + basis_evaluation = basis(t_points).reshape((basis.n_basis, -1)) + std_values = np.sqrt( + np.sum( + basis_evaluation * (coeff_cov_matrix @ basis_evaluation), + axis=0, + ), + ) + return np.reshape(std_values, (1, -1, X.dim_codomain)) + + return function_to_fdatabasis(f=std_function, new_basis=X.basis) + + def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat: """ Calculate the Modified Epigraph Index of a FDataGrid. diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py new file mode 100644 index 000000000..24f6687af --- /dev/null +++ b/skfda/tests/test_stats_std.py @@ -0,0 +1,188 @@ +"""Test stats functions.""" + +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest + +from skfda import FDataBasis, FDataGrid +from skfda.exploratory.stats import std +from skfda.representation.basis import ( + Basis, + FourierBasis, + MonomialBasis, + TensorBasis, + VectorValuedBasis, +) + +# Fixtures for test_std_fdatabasis_vector_valued_basis + + +@pytest.fixture(params=[3, 5]) +def vv_n_basis1(request: Any) -> int: + """n_basis for 1st coordinate of vector valued basis.""" + return request.param # type: ignore[no-any-return] + + +@pytest.fixture +def vv_basis1(vv_n_basis1: int) -> Basis: + """1-dimensional basis to test for vector valued basis.""" + # First element of the basis is assumed to be the 1 function + return MonomialBasis( + n_basis=vv_n_basis1, domain_range=(0, 1), + ) + + +@pytest.fixture(params=[FourierBasis, MonomialBasis]) +def vv_basis2(request: Any, vv_n_basis2: int = 3) -> Basis: + """1-dimensional basis to test for vector valued basis.""" + # First element of the basis is assumed to be the 1 function + return request.param( # type: ignore[no-any-return] + domain_range=(0, 1), n_basis=vv_n_basis2, + ) + + +# Fixtures for test_std_fdatabasis_tensor_basis + +@pytest.fixture(params=[FourierBasis]) +def t_basis1(request: Any, t_n_basis1: int = 3) -> Basis: + """1-dimensional basis to test for tensor basis.""" + # First element of the basis is assumed to be the 1 function + return request.param( # type: ignore[no-any-return] + domain_range=(0, 1), n_basis=t_n_basis1, + ) + + +@pytest.fixture(params=[MonomialBasis]) +def t_basis2(request: Any, t_n_basis2: int = 5) -> Basis: + """1-dimensional basis to test for tensor basis.""" + # First element of the basis is assumed to be the 1 function + return request.param( # type: ignore[no-any-return] + domain_range=(0, 1), n_basis=t_n_basis2, + ) + + +# Tests + +def test_std_fdatagrid_1d_to_2d() -> None: + """Test std_fdatagrid with R to R^2 functions.""" + fd = FDataGrid( + data_matrix=[ + [[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]], + [[2, 3, 4, 5, 6, 7], [-2, -3, -4, -5, -6, -7]], + ], + grid_points=[ + [-2, -1], + [0, 1, 2, 3, 4, 5], + ], + ) + expected_std_data_matrix = np.full((1, 2, 6, 1), np.sqrt(2)) + np.testing.assert_allclose( + std(fd).data_matrix, + expected_std_data_matrix, + ) + + +def test_std_fdatagrid_2d_to_2d() -> None: + """Test std_fdatagrid with R to R^2 functions.""" + fd = FDataGrid( + data_matrix=[ + [ + [[10, 11], [10, 12], [11, 14]], + [[15, 16], [12, 15], [20, 13]], + ], + [ + [[11, 12], [11, 13], [12, 13]], + [[14, 15], [11, 16], [21, 12]], + ], + ], + grid_points=[ + [0, 1], + [0, 1, 2], + ], + ) + expected_std_data_matrix = np.full((1, 2, 3, 2), np.sqrt(1 / 2)) + np.testing.assert_allclose( + std(fd).data_matrix, + expected_std_data_matrix, + ) + + +def test_std_fdatabasis_vector_valued_basis( + vv_basis1: Basis, + vv_basis2: Basis, +) -> None: + """Test std_fdatabasis with a vector valued basis.""" + basis = VectorValuedBasis([vv_basis1, vv_basis2]) + + # coefficients of the function===(1, 1) + one_coefficients = np.concatenate(( + np.pad([1], (0, vv_basis1.n_basis - 1)), + np.pad([1], (0, vv_basis2.n_basis - 1)), + )) + + fd = FDataBasis( + basis=basis, + coefficients=[np.zeros(basis.n_basis), one_coefficients], + ) + + np.testing.assert_allclose( + std(fd).coefficients, + np.array([np.sqrt(1 / 2) * one_coefficients]), + rtol=1e-7, + atol=1e-7, + ) + + +def test_std_fdatabasis_tensor_basis( + t_basis1: Basis, + t_basis2: Basis, +) -> None: + """Test std_fdatabasis with a vector valued basis.""" + basis = TensorBasis([t_basis1, t_basis2]) + + # coefficients of the function===1 + one_coefficients = np.pad([1], (0, basis.n_basis - 1)) + + fd = FDataBasis( + basis=basis, + coefficients=[np.zeros(basis.n_basis), one_coefficients], + ) + + np.testing.assert_allclose( + std(fd).coefficients, + np.array([np.sqrt(1 / 2) * one_coefficients]), + rtol=1e-7, + atol=1e-7, + ) + + +def test_std_fdatabasis_2d_to_2d() -> None: + """Test std_fdatabasis with R^2 to R^2 basis.""" + basis = VectorValuedBasis([ + TensorBasis([ + MonomialBasis(domain_range=(0, 1), n_basis=2), + MonomialBasis(domain_range=(0, 1), n_basis=2), + ]), + TensorBasis([ + MonomialBasis(domain_range=(0, 1), n_basis=2), + MonomialBasis(domain_range=(0, 1), n_basis=2), + ]), + ]) + fd = FDataBasis( + basis=basis, + coefficients=[ + [0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 1, 0, 0, 0], + ], + ) + expected_coefficients = np.array([[np.sqrt(1 / 2), 0, 0, 0] * 2]) + + np.testing.assert_allclose( + std(fd).coefficients, + expected_coefficients, + rtol=1e-7, + atol=1e-7, + )