Add LogCholesky metric for SPD matrices

matthieubulte · Jan 23, 2024 · 92fc4af · 92fc4af
1 parent 7d668cc
commit 92fc4af
Show file tree

Hide file tree

Showing 7 changed files with 65 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,5 @@ __pycache__/
 playground
 build/
 dist/
-*.egg-info/
+*.egg-info/
+.history/
diff --git a/LICENSE b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2023 Matthieu Bulté <mb@math.ku.dk>
+Copyright 2024 Matthieu Bulté <mb@math.ku.dk>
 
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 

diff --git a/pyfrechet/metric_spaces/__init__.py b/pyfrechet/metric_spaces/__init__.py
@@ -6,6 +6,7 @@
 from .wasserstein_1d import Wasserstein1D
 from .network import NetworkCholesky
 from .riemannian_manifold import RiemannianManifold
+from .log_cholesky import LogCholesky
 
 from .fisher_rao_phase import has_fda
 if has_fda:

diff --git a/pyfrechet/metric_spaces/log_cholesky.py b/pyfrechet/metric_spaces/log_cholesky.py
@@ -0,0 +1,47 @@
+import numpy as np
+from .metric_space import MetricSpace
+
+class LogCholesky(MetricSpace):
+    """
+    Log-Cholesky space of dxd SPD matrices as defined in [1]. The matrices are represented
+    via their Cholesky decomposition by the tuple (D, L) where D are the diagonal elements
+    of the Cholesky factor and L is the lower triangular without the diagonal factors.
+
+    Then, for A = (D1, L1) and B = (D2, L2)
+    d(A, B)^2 = || L1 - L2 ||_F^2 + || log D1 - log D2 ||_2^2
+
+    For a rv X = (D, L), 
+    E[X] = (exp(E[log D], E[L]) 
+
+    We represent this in a vector [ log D; vec(L) ] of dim d(d+1)/2. Transforming from and 
+    to the full matrix representation can be done via the spd_to_log_chol and log_chol_to_spd
+    functions.
+
+    [1] RIEMANNIAN GEOMETRY OF SYMMETRIC POSITIVE DEFINITE MATRICES VIA CHOLESKY DECOMPOSITION
+    """
+    def __init__(self, dim):
+        self.dim = dim
+
+    def _d(self, x, y):
+        return np.linalg.norm(x - y)
+
+    def _frechet_mean(self, y, w):
+        return w.dot(y)
+
+    def __str__(self):
+        return f'LogCholesky({self.dim}x{self.dim})'
+
+
+def spd_to_log_chol(X):
+    d = X.shape[0]
+    L = np.linalg.cholesky(X)
+    return np.r_[np.log(L[np.diag_indices(d)]), L[np.tril_indices(d, -1)]]
+
+
+def log_chol_to_spd(DL):
+    n = DL.shape[0]
+    d = int((-1 + np.sqrt(1 + 8 * n)) / 2)
+    L = np.zeros(shape=(d,d))
+    L[np.diag_indices(d)] = np.exp(DL[:d])
+    L[np.tril_indices(d, -1)] = DL[d:]
+    return L.dot(L.T)
diff --git a/pyfrechet/regression/trees.py b/pyfrechet/regression/trees.py
@@ -32,8 +32,8 @@ class Node:
 def _2means_propose_splits(X_j):
     kmeans = KMeans(
         n_clusters=2,
-        random_state=0, 
-        n_init='auto'
+        n_init=1,
+        max_iter=10
     ).fit(X_j.reshape((X_j.shape[0], 1)))
     assert not kmeans.labels_ is None
     sel = kmeans.labels_.astype(bool)
@@ -49,6 +49,9 @@ def _greedy_propose_splits(X_j):
         yield X_j[i]
 
 
+
+
+
 class Tree(WeightingRegressor):
     def __init__(self, 
                  split_type='greedy',
@@ -58,7 +61,6 @@ def __init__(self,
                  is_honest=False,
                  honesty_fraction=0.5):
         super().__init__(precompute_distances=(impurity_method == 'medoid'))
-
         # TODO: parameter constraints, see https://github.com/scikit-learn/scikit-learn/blob/364c77e047ca08a95862becf40a04fe9d4cd2c98/sklearn/ensemble/_forest.py#L199
         self.split_type = split_type
         self.impurity_method = impurity_method

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="pyfrechet",
-    version="1.0.1",
+    version="1.0.2",
     author="Matthieu Bulté",
     author_email="mb@math.ku.dk",
     description="A module for the manipulation and analysis of data in metric spaces.",

diff --git a/tests/test_metric_data.py b/tests/test_metric_data.py
@@ -17,7 +17,12 @@ def random_corr_matrix(dim):
 
     dim = 5
     y = np.array([ random_corr_matrix(dim) for i in range(n) ])
-    return CorrFrobenius(5), y
+    return CorrFrobenius(dim), y
+
+def gen_log_cholesky(n):
+    dim = 5
+    n_entries =int(dim*(dim + 1)/2)
+    return LogCholesky(dim), np.random.rand(n*n_entries).reshape((n,n_entries))
 
 def gen_sphere(n): 
     dim = 4
@@ -47,7 +52,8 @@ def gen_wasserstein(n):
     gen_euclidean,
     gen_corr_matrices,
     gen_wasserstein,
-    gen_fr_phase
+    gen_fr_phase,
+    gen_log_cholesky
 ]
 
 @pytest.mark.parametrize("gen_data", GENERATORS)