test: add covariance and correlation tests

factorpricingmodel · Dec 29, 2022 · ca47fe9 · ca47fe9
1 parent 4bc0028
commit ca47fe9
Show file tree

Hide file tree

Showing 4 changed files with 235 additions and 9 deletions.
diff --git a/src/fpm_risk_model/factor_risk_model.py b/src/fpm_risk_model/factor_risk_model.py
@@ -1,7 +1,7 @@
 from abc import ABC
 from typing import Any, Dict, Optional, Union
 
-from numpy import diag_indices_from, diagonal, ndarray, sqrt, var
+from numpy import all, diag_indices_from, diagonal, nan, ndarray, sqrt, var
 from pandas import DataFrame
 
 
@@ -127,13 +127,21 @@ def copy(self) -> object:
             residual_returns=self._residual_returns.copy(),
         )
 
-    def specific_variances(self) -> ndarray:
+    def specific_variances(self, ddof=1) -> ndarray:
         """
         Get specific variances.
         """
-        return var(self._residual_returns)
+        if isinstance(self._residual_returns, ndarray):
+            return var(self._residual_returns, axis=0, ddof=ddof)
+        elif isinstance(self._residual_returns, DataFrame):
+            return self._residual_returns.var(ddof=ddof)
 
-    def covariance(self):
+        raise TypeError(
+            "Only pandas DataFrame / numpy ndarray is supported, but not "
+            f"{self._residual_returns.__class__.__name__}"
+        )
+
+    def cov(self):
         """
         Get the covariance matrix.
         """
@@ -143,21 +151,29 @@ def covariance(self):
         )
 
         if isinstance(cov, DataFrame):
-            cov.values[diag_indices_from(cov.values)] += specific_variances
+            cov_values = cov.values
         elif isinstance(cov, ndarray):
-            cov[diag_indices_from(cov)] += specific_variances
+            cov_values = cov
         else:
             raise TypeError(
                 "Only pandas DataFrame / numpy ndarray is supported, but not "
                 f"{cov.__class__.__name__}"
             )
 
+        # Add the specific variances into the covariance matrix
+        cov_values[diag_indices_from(cov_values)] += specific_variances
+
+        # Set zero covariance instruments to nan
+        nan_instruments = all(cov_values == 0.0, axis=0)
+        cov_values[nan_instruments, :] = nan
+        cov_values[:, nan_instruments] = nan
+
         return cov
 
-    def correlation(self):
+    def corr(self):
         """
         Get the correlation matrix.
         """
-        cov = self.covariance()
+        cov = self.cov()
         vol = sqrt(diagonal(cov))
         return ((cov / vol).T / vol).T
diff --git a/tests/statistical/pca/test_pca.py b/tests/statistical/pca/test_pca.py
@@ -169,3 +169,21 @@ def test_pca_pd(
             columns=pca.factor_covariances.columns,
         ),
     )
+
+
+@pytest.mark.parametrize("speedup", [False, True])
+def test_pca_same_covariances(
+    daily_returns_pd,
+    speedup,
+):
+    """
+    Covariances should be the same if the number of components
+    is same as the rank of the daily returns.
+    """
+    factor_risk_model = PCA(n_components=3, speedup=speedup)
+    factor_risk_model.fit(X=daily_returns_pd)
+    expected_covariances = daily_returns_pd.cov()
+    pd.testing.assert_frame_equal(
+        expected_covariances,
+        factor_risk_model.cov().fillna(0.0),
+    )
diff --git a/tests/test_factor_risk_model.py b/tests/test_factor_risk_model.py
@@ -0,0 +1,192 @@
+import pytest
+
+from numpy import array, nan
+import numpy as np
+from pandas import DataFrame
+import pandas as pd
+
+from fpm_risk_model.factor_risk_model import FactorRiskModel
+
+
+@pytest.fixture(scope="module")
+def instruments():
+    return ["A", "AAL", "AAP", "AAPL"]
+
+
+@pytest.fixture(scope="module")
+def dates():
+    return pd.bdate_range("2016-01-04", "2016-01-15")
+
+
+@pytest.fixture(scope="module")
+def factors():
+    return ["factor_1", "factor_2"]
+
+
+@pytest.fixture(scope="module")
+def daily_returns_np():
+    return array(
+        [
+            [-0.02678756, -0.03400254, 0.0, 0.000855],
+            [-0.00344077, -0.00953307, 0.0, -0.02505943],
+            [0.00443915, 0.01752232, 0.0, -0.01956966],
+            [-0.04247514, -0.01891826, 0.0, -0.04220453],
+            [-0.01051272, -0.00197782, 0.0, 0.00528776],
+            [-0.01684373, 0.01758743, 0.0, 0.01619198],
+            [0.00658919, 0.02239528, 0.0, 0.01451376],
+            [-0.03482585, -0.0452383, 0.0, -0.02571051],
+            [0.02034743, 0.01122229, 0.0, 0.02187115],
+            [-0.01329412, -0.04414332, 0.0, -0.02401548],
+        ]
+    )
+
+
+@pytest.fixture(scope="module")
+def daily_returns_pd(daily_returns_np, instruments, dates):
+    return DataFrame(
+        daily_returns_np,
+        index=dates,
+        columns=instruments,
+    )
+
+
+@pytest.fixture(scope="module")
+def factor_exposures():
+    return array(
+        [
+            [-0.15454215, -0.22795166, 0.0, -0.17179763],
+            [0.00706732, 0.08354979, 0.0, -0.11721647],
+        ]
+    )
+
+
+@pytest.fixture(scope="module")
+def factor_returns():
+    return array(
+        [
+            [0.06323026, -0.15644581],
+            [0.01829957, 0.09617634],
+            [-0.06074615, 0.17670851],
+            [0.1238173, 0.14189995],
+            [-0.03715707, -0.04710242],
+            [-0.08798148, -0.03209175],
+            [-0.1300186, 0.00469255],
+            [0.14264733, -0.05445549],
+            [-0.13802269, -0.07709159],
+            [0.10593153, -0.05229029],
+        ]
+    )
+
+
+@pytest.fixture(scope="module")
+def residual_returns():
+    return array(
+        [
+            [-0.00422976, 0.00199051, 0.0, 0.00116378],
+            [0.01038799, -0.00488857, 0.0, -0.00285816],
+            [0.00548287, -0.00258023, 0.0, -0.00150856],
+            [-0.01266259, 0.00595899, 0.0, 0.003484],
+            [-0.00424175, 0.00199616, 0.0, 0.00116708],
+            [-0.01853336, 0.00872176, 0.0, 0.00509929],
+            [-0.00185692, 0.00087386, 0.0, 0.00051091],
+            [-0.00071556, 0.00033674, 0.0, 0.00019688],
+            [0.01124235, -0.00529063, 0.0, -0.00309323],
+            [0.01512673, -0.00711861, 0.0, -0.00416198],
+        ]
+    )
+
+
+@pytest.fixture(scope="module")
+def factor_covariances():
+    return array([[1.11111111e-02, -1.13074741e-18], [-1.13074741e-18, 1.11111111e-02]])
+
+
+@pytest.fixture(scope="module")
+def factor_risk_model_np(
+    factor_exposures, factor_returns, factor_covariances, residual_returns
+):
+    return FactorRiskModel(
+        factor_exposures=factor_exposures,
+        factor_returns=factor_returns,
+        factor_covariances=factor_covariances,
+        residual_returns=residual_returns,
+    )
+
+
+@pytest.fixture(scope="module")
+def factor_risk_model_pd(
+    factor_exposures,
+    factor_returns,
+    factor_covariances,
+    residual_returns,
+    dates,
+    instruments,
+    factors,
+):
+    return FactorRiskModel(
+        factor_exposures=DataFrame(
+            factor_exposures, index=factors, columns=instruments
+        ),
+        factor_returns=DataFrame(factor_returns, index=dates, columns=factors),
+        factor_covariances=DataFrame(
+            factor_covariances, index=factors, columns=factors
+        ),
+        residual_returns=DataFrame(residual_returns, index=dates, columns=instruments),
+    )
+
+
+@pytest.fixture(scope="module")
+def expected_covariances():
+    return array(
+        [
+            [0.00038113, 0.00039798, nan, 0.0002858],
+            [0.00039798, 0.00068043, nan, 0.00032631],
+            [nan, nan, nan, nan],
+            [0.0002858, 0.00032631, nan, 0.00048932],
+        ]
+    )
+
+
+@pytest.fixture(scope="module")
+def expected_correlations():
+    return array(
+        [
+            [1.0, 0.78151541, nan, 0.66179042],
+            [0.78151541, 1.0, nan, 0.56551526],
+            [nan, nan, nan, nan],
+            [0.66179042, 0.56551526, nan, 1.0],
+        ]
+    )
+
+
+def test_factor_risk_model_np_covariances(factor_risk_model_np, expected_covariances):
+    cov = factor_risk_model_np.cov()
+    np.testing.assert_allclose(cov, expected_covariances, atol=1e-7)
+
+
+def test_factor_risk_model_np_correlations(factor_risk_model_np, expected_correlations):
+    corr = factor_risk_model_np.corr()
+    np.testing.assert_allclose(corr, expected_correlations)
+
+
+def test_factor_risk_model_pd_covariances(
+    factor_risk_model_pd, expected_covariances, instruments
+):
+    cov = factor_risk_model_pd.cov()
+    expected_covariances = pd.DataFrame(
+        expected_covariances, index=instruments, columns=instruments
+    )
+    pd.testing.assert_frame_equal(
+        cov,
+        expected_covariances,
+    )
+
+
+def test_factor_risk_model_pd_correlations(
+    factor_risk_model_pd, expected_correlations, instruments
+):
+    corr = factor_risk_model_pd.corr()
+    expected_correlations = pd.DataFrame(
+        expected_correlations, index=instruments, columns=instruments
+    )
+    pd.testing.assert_frame_equal(corr, expected_correlations)
diff --git a/tests/test_rolling_factor_risk_model.py b/tests/test_rolling_factor_risk_model.py
@@ -241,7 +241,7 @@ def expected_factor_covariances(dates, factors):
     }
 
 
-def test_rolling_pca_np(
+def test_rolling_factor_risk_model(
     daily_returns,
     expected_factor_exposures,
     expected_factor_returns,