Skip to content

Commit

Permalink
Add LogCholesky metric for SPD matrices
Browse files Browse the repository at this point in the history
  • Loading branch information
matthieubulte committed Jan 23, 2024
1 parent 7d668cc commit 92fc4af
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 8 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ __pycache__/
playground
build/
dist/
*.egg-info/
*.egg-info/
.history/
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2023 Matthieu Bulté <mb@math.ku.dk>
Copyright 2024 Matthieu Bulté <mb@math.ku.dk>

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Expand Down
1 change: 1 addition & 0 deletions pyfrechet/metric_spaces/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .wasserstein_1d import Wasserstein1D
from .network import NetworkCholesky
from .riemannian_manifold import RiemannianManifold
from .log_cholesky import LogCholesky

from .fisher_rao_phase import has_fda
if has_fda:
Expand Down
47 changes: 47 additions & 0 deletions pyfrechet/metric_spaces/log_cholesky.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numpy as np
from .metric_space import MetricSpace

class LogCholesky(MetricSpace):
"""
Log-Cholesky space of dxd SPD matrices as defined in [1]. The matrices are represented
via their Cholesky decomposition by the tuple (D, L) where D are the diagonal elements
of the Cholesky factor and L is the lower triangular without the diagonal factors.
Then, for A = (D1, L1) and B = (D2, L2)
d(A, B)^2 = || L1 - L2 ||_F^2 + || log D1 - log D2 ||_2^2
For a rv X = (D, L),
E[X] = (exp(E[log D], E[L])
We represent this in a vector [ log D; vec(L) ] of dim d(d+1)/2. Transforming from and
to the full matrix representation can be done via the spd_to_log_chol and log_chol_to_spd
functions.
[1] RIEMANNIAN GEOMETRY OF SYMMETRIC POSITIVE DEFINITE MATRICES VIA CHOLESKY DECOMPOSITION
"""
def __init__(self, dim):
self.dim = dim

def _d(self, x, y):
return np.linalg.norm(x - y)

def _frechet_mean(self, y, w):
return w.dot(y)

def __str__(self):
return f'LogCholesky({self.dim}x{self.dim})'


def spd_to_log_chol(X):
d = X.shape[0]
L = np.linalg.cholesky(X)
return np.r_[np.log(L[np.diag_indices(d)]), L[np.tril_indices(d, -1)]]


def log_chol_to_spd(DL):
n = DL.shape[0]
d = int((-1 + np.sqrt(1 + 8 * n)) / 2)
L = np.zeros(shape=(d,d))
L[np.diag_indices(d)] = np.exp(DL[:d])
L[np.tril_indices(d, -1)] = DL[d:]
return L.dot(L.T)
8 changes: 5 additions & 3 deletions pyfrechet/regression/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ class Node:
def _2means_propose_splits(X_j):
kmeans = KMeans(
n_clusters=2,
random_state=0,
n_init='auto'
n_init=1,
max_iter=10
).fit(X_j.reshape((X_j.shape[0], 1)))
assert not kmeans.labels_ is None
sel = kmeans.labels_.astype(bool)
Expand All @@ -49,6 +49,9 @@ def _greedy_propose_splits(X_j):
yield X_j[i]





class Tree(WeightingRegressor):
def __init__(self,
split_type='greedy',
Expand All @@ -58,7 +61,6 @@ def __init__(self,
is_honest=False,
honesty_fraction=0.5):
super().__init__(precompute_distances=(impurity_method == 'medoid'))

# TODO: parameter constraints, see https://github.com/scikit-learn/scikit-learn/blob/364c77e047ca08a95862becf40a04fe9d4cd2c98/sklearn/ensemble/_forest.py#L199
self.split_type = split_type
self.impurity_method = impurity_method
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="pyfrechet",
version="1.0.1",
version="1.0.2",
author="Matthieu Bulté",
author_email="mb@math.ku.dk",
description="A module for the manipulation and analysis of data in metric spaces.",
Expand Down
10 changes: 8 additions & 2 deletions tests/test_metric_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@ def random_corr_matrix(dim):

dim = 5
y = np.array([ random_corr_matrix(dim) for i in range(n) ])
return CorrFrobenius(5), y
return CorrFrobenius(dim), y

def gen_log_cholesky(n):
dim = 5
n_entries =int(dim*(dim + 1)/2)
return LogCholesky(dim), np.random.rand(n*n_entries).reshape((n,n_entries))

def gen_sphere(n):
dim = 4
Expand Down Expand Up @@ -47,7 +52,8 @@ def gen_wasserstein(n):
gen_euclidean,
gen_corr_matrices,
gen_wasserstein,
gen_fr_phase
gen_fr_phase,
gen_log_cholesky
]

@pytest.mark.parametrize("gen_data", GENERATORS)
Expand Down

0 comments on commit 92fc4af

Please sign in to comment.