diff --git a/README.md b/README.md index 462781e..f617082 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) -`multispaeti` is an implementation of +`multispaeti` is a Python implementation of [MULTISPATI-PCA](https://doi.org/10.3170/2007-8-18312) / [spatialPCA](https://doi.org/10.1038/hdy.2008.34). diff --git a/docs/source/conf.py b/docs/source/conf.py index 8a3efe1..9b4cc70 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -34,7 +34,7 @@ autodoc_typehints = "none" autodoc_typehints_format = "short" -autoclass_content = "both" +autoclass_content = "class" autodoc_member_order = "groupwise" python_use_unqualified_type_names = True # still experimental diff --git a/docs/source/index.rst b/docs/source/index.rst index e51ef74..3617296 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,7 +1,7 @@ What is ``multiSPAETI``? ======================== -``multispaeti`` is an implementation of +``multispaeti`` is a Python implementation of `MULTISPATI-PCA `_ / `spatialPCA `_. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 048c356..3a15088 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -51,10 +51,3 @@ Alternatively, this can be achieved in one step by .. .. code-block:: python .. X_transformed = msPCA.moransI_bounds() - -.. and :py:meth:`multispaeti.MultispatiPCA.variance_moransI_decomposition` which allows -.. to decompose the extracted eigenvalues into a variance and Moran's `I` contribution. - -.. .. code-block:: python - -.. var, moranI = msPCA.variance_moransI_decomposition(X) diff --git a/multispaeti/_multispati_pca.py b/multispaeti/_multispati_pca.py index 11cd889..3f0c97b 100644 --- a/multispaeti/_multispati_pca.py +++ b/multispaeti/_multispati_pca.py @@ -31,6 +31,53 @@ class MultispatiPCA: :math:`H=1/(2n)*X^t(W+W^t)X` where `X` is matrix of `n` observations :math:`\\times` `d` features, and `W` is a matrix of the connectivity between observations. + Parameters + ---------- + n_components : int or tuple[int, int], optional + Number of components to keep. + If None, will keep all components (only supported for non-sparse `X`). + If an int, it will keep the top `n_components`. + If a tuple, it will keep the top and bottom `n_components` respectively. + connectivity : scipy.sparse.sparray or scipy.sparse.spmatrix + Matrix of row-wise neighbor definitions i.e. c\ :sub:`ij` is the connectivity of + i :math:`\\to` j. The matrix does not have to be symmetric. It can be a + binary adjacency matrix or a matrix of connectivities in which case + c\ :sub:`ij` should be larger if i and j are close. + A distance matrix should be transformed to connectivities by e.g. + calculating :math:`1-d/d_{max}` beforehand. + + Raises + ------ + ValueError + If connectivity is not a square matrix. + ZeroDivisionError + If one of the observations has no neighbors. + + Attributes + ---------- + components_ : numpy.ndarray + The estimated components: Array of shape `(n_components, n_features)`. + + variance_ : numpy.ndarray + The estimated variance part of the eigenvalues. Array of shape `(n_components,)`. + + moransI_ : numpy.ndarray + The estimated Moran's I part of the eigenvalues. Array of shape `(n_components,)`. + + eigenvalues_ : numpy.ndarray + The eigenvalues corresponding to each of the selected components. Array of shape + `(n_components,)`. + + n_components_ : int + The estimated number of components. + + n_samples_ : int + Number of samples in the training data. + + n_features_in_ : int + Number of features seen during :term:`fit`. + + References ---------- `Dray, Stéphane, Sonia Saïd, and Françis Débias. "Spatial ordination of vegetation @@ -47,29 +94,6 @@ def __init__( *, connectivity: sparray | spmatrix, ): - """ - Parameters - ---------- - - n_components : int or tuple[int, int], optional - Number of components to keep. - If None, will keep all components (only supported for non-sparse `X`). - If an int, it will keep the top `n_components`. - If a tuple, it will keep the top and bottom `n_components` respectively. - connectivity : scipy.sparse.sparray or scipy.sparse.spmatrix - Matrix of row-wise neighbor definitions i.e. c\ :sub:`ij` is the connectivity of - i :math:`\\to` j. The matrix does not have to be symmetric. It can be a - binary adjacency matrix or a matrix of connectivities in which case - c\ :sub:`ij` should be larger if i and j are close. - A distance matrix should be transformed to connectivities by e.g. - calculating :math:`1-d/d_{max}` beforehand. - Raises - ------ - ValueError - If connectivity is not a square matrix. - ZeroDivisionError - If one of the observations has no neighbors. - """ self._fitted = False W = csr_array(connectivity) if W.shape[0] != W.shape[1]: @@ -247,7 +271,10 @@ def transform(self, X: _X) -> np.ndarray: self._not_fitted() X = scale(X, with_mean=not issparse(X)) - return X @ self.components_ + X_t = X @ self.components_ + self.variance_, self.moransI_ = self._variance_moransI_decomposition(X_t) + + return X_t def fit_transform(self, X: _X) -> np.ndarray: """ @@ -291,36 +318,17 @@ def transform_spatial_lag(self, X: _X) -> np.ndarray: def _spatial_lag(self, X: np.ndarray) -> np.ndarray: return self.W @ X - def variance_moransI_decomposition(self, X: _X) -> tuple[np.ndarray, np.ndarray]: - """ - Calculate the decomposition of the variance and Moran's I for `n_components`. - - Parameters - ---------- - X : numpy.ndarray or scipy.sparse.csr_array or scipy.sparse.csc_array - Array of observations x features. - - Returns - ------- - tuple[numpy.ndarray, numpy.ndarray] - Variance and Moran's I. - - Raises - ------ - ValueError - If instance has not been fitted. - """ - if not self._fitted: - self._not_fitted() - transformed = self.transform(X) - lag = self._spatial_lag(transformed) + def _variance_moransI_decomposition( + self, X_t: np.ndarray + ) -> tuple[np.ndarray, np.ndarray]: + lag = self._spatial_lag(X_t) # vector of row_Weights from dudi.PCA - # (we only use default row_weights i.e. 1/n anyways) - w = 1 / X.shape[0] + # (we only use default row_weights i.e. 1/n) + w = 1 / X_t.shape[0] - variance = np.sum(transformed * transformed * w, axis=0) - moran = np.sum(transformed * lag * w, axis=0) / variance + variance = np.sum(X_t * X_t * w, axis=0) + moran = np.sum(X_t * lag * w, axis=0) / variance return variance, moran diff --git a/multispaeti/_plotting.py b/multispaeti/_plotting.py index 6150c96..a36c37e 100644 --- a/multispaeti/_plotting.py +++ b/multispaeti/_plotting.py @@ -13,6 +13,9 @@ def plot_eigenvalues(msPCA: MultispatiPCA, *, n_top: int | None = None) -> Figur Parameters ---------- msPCA : MultispatiPCA + An instance of MultispatiPCA that has already been used for + :py:meth:`multispaeti.MultispatiPCA.fit` so that eigenvalues have already been + calculated. n_top : int, optional Plot the `n_top` highest and `n_top` lowest eigenvalues in a zoomed in view. @@ -52,7 +55,7 @@ def plot_eigenvalues(msPCA: MultispatiPCA, *, n_top: int | None = None) -> Figur def plot_variance_moransI_decomposition( - msPCA: MultispatiPCA, X, *, sparse_approx: bool = True, **kwargs + msPCA: MultispatiPCA, *, sparse_approx: bool = True, **kwargs ) -> Figure: """ Plot the decomposition of variance and Moran's I of the MULTISPATI-PCA eigenvalues. @@ -63,8 +66,9 @@ def plot_variance_moransI_decomposition( Parameters ---------- msPCA : multispaeti.MultispatiPCA - X : numpy.ndarray or scipy.sparse.csr_array or scipy.sparse.csc_array - TODO Data to calculate the decomposition for. + An instance of MultispatiPCA that has already been used for + :py:meth:`multispaeti.MultispatiPCA.transform` so that variance and Moran's I + contributions to the eigenvalues have already been calculated. sparse_approx : bool Whether to use a sparse approximation to calculate the decomposition. @@ -73,11 +77,10 @@ def plot_variance_moransI_decomposition( matplotlib.figure.Figure """ - variance, moranI = msPCA.variance_moransI_decomposition(X) I_min, I_max, I_0 = msPCA.moransI_bounds(sparse_approx=sparse_approx) fig, ax = plt.subplots(1) - _ = ax.scatter(x=variance, y=moranI, **kwargs) + _ = ax.scatter(x=msPCA.variance_, y=msPCA.moransI_, **kwargs) plt.axhline(y=I_0, ls="--") plt.axhline(y=I_min, ls="--")