Skip to content

Commit

Permalink
feat: add statistical risk model Rolling PCA
Browse files Browse the repository at this point in the history
  • Loading branch information
gavincyi committed Dec 2, 2022
1 parent 4da36f7 commit 603344c
Show file tree
Hide file tree
Showing 7 changed files with 612 additions and 135 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
</p>

Package to build risk models for factor pricing model. For further details, please refer
to the [documentation](https://factor-pricing-model-risk models.readthedocs.io/en/latest/)
to the [documentation](https://factor-pricing-model-risk-models.readthedocs.io/en/latest/)

## Installation

Expand Down
56 changes: 56 additions & 0 deletions src/fpm_risk_model/factor_risk_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from abc import ABC

from numpy import ndarray


class FactorRiskModel(ABC):
"""
Factor Risk Model.
The model contains factor exposures, factors, and
residual returns.
"""

def __init__(self):
self._factor_exposures = None
self._factors = None
self._residual_returns = None

@property
def factor_exposures(self) -> ndarray:
"""
Return the factor exposures.
Return
------
ndarray
Matrix in dimension (N, n) where N is the number of
instruments and n is the number of components in PCA.
"""
return self._factor_exposures

@property
def factors(self) -> ndarray:
"""
Return the factors.
Return
------
ndarray
Matrix in dimension (n, T) where n is the number of
components in PCA and T is the number of time frames.
"""
return self._factors

@property
def residual_returns(self) -> ndarray:
"""
Return the residual returns.
Return
------
ndarray
Matrix in dimension (N, T) where N is the number of
instruments and T is the number of time frames.
"""
return self._residual_returns
3 changes: 3 additions & 0 deletions src/fpm_risk_model/statistical/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# flake8: noqa
from .pca import PCA
from .rolling_pca import RollingPCA
136 changes: 5 additions & 131 deletions src/fpm_risk_model/statistical/pca.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from typing import Union, Optional
from typing import Optional, Union

import numpy as np
import pandas as pd

from sklearn.decomposition import PCA as sklearn_PCA

from ..factor_risk_model import FactorRiskModel


class PCA:
class PCA(FactorRiskModel):
def __init__(
self,
n_components: int,
Expand All @@ -26,52 +27,11 @@ def __init__(
Indicate whether to speed up the computation as much as possible.
Default is True.
"""
super().__init__()
self._n_components = n_components
self._demean = demean
self._speedup = speedup
self._model = sklearn_PCA(n_components=n_components)
self._factor_exposures = None
self._factors = None
self._residual_returns = None

@property
def factor_exposures(self) -> np.ndarray:
"""
Return the factor exposures.
Return
------
np.ndarray
Matrix in dimension (N, n) where N is the number of
instruments and n is the number of components in PCA.
"""
return self._factor_exposures

@property
def factors(self) -> np.ndarray:
"""
Return the factors.
Return
------
np.ndarray
Matrix in dimension (n, T) where n is the number of
components in PCA and T is the number of time frames.
"""
return self._factors

@property
def residual_returns(self) -> np.ndarray:
"""
Return the residual returns.
Return
------
np.ndarray
Matrix in dimension (N, T) where N is the number of
instruments and T is the number of time frames.
"""
return self._residual_returns

def fit(self, X: Union[np.ndarray, pd.DataFrame]) -> object:
"""
Expand Down Expand Up @@ -150,89 +110,3 @@ def fit(self, X: Union[np.ndarray, pd.DataFrame]) -> object:
self._factors = F
self._residual_returns = residual_returns
return self


class RollingPCA:
def __init__(
self,
n_components: int,
demean: bool,
rolling_timeframe: int,
fillna_zero: Optional[bool] = True,
):
"""
Constructor.
Parameters
----------
n_components : int
Number of components.
demean : bool
Indicate whether to demean before fitting.
rolling_timeframe: int
Number of rolling time frames.
fillna_zero: bool
Fill the nan to 0.0 always. Default is True.
"""
self._n_components = n_components
self._demean = demean
self._rolling_timeframe = rolling_timeframe
self._fillna_zero = fillna_zero
self._model = PCA(n_components=n_components, demean=demean)
self._factor_exposures = {}
self._factors = {}
self._residual_returns = {}

def fit(
self,
X: Union[np.ndarray, pd.DataFrame],
validity: Optional[Union[np.ndarray, pd.DataFrame]] = None,
) -> object:
"""
Fit the returns into the risk model.
Parameters
----------
X: pandas.DataFrame or numpy.ndarray
Instrument returns where the rows are the instruments
and the columns are the date / time in ascending order.
For example, if there are N instruments and T days of
returns, the input is with the dimension of (N, T).
Returns
-------
object
The object itself.
if validity is not None and X.shape != validity.shape:
raise ValueError(
f"Dimension of X {X.shape} is different than "
f"dimension of validity {validity.shape}"
)
"""
self._factor_exposures = {}
self._factors = {}
self._residual_returns = {}

for index in range(0, X.shape[1]):
start_index = index
end_index = index + self._rolling_timeframe + 1
if end_index >= X.shape[1]:
break

X_input = X[:, start_index:end_index]
if validity:
X_input = X_input.where(validity[start_index:end_index])

if self._fillna_zero:
X_input = np.nan_to_num(X_input)

index_name = index
if isinstance(X, pd.DataFrame):
index_name = X.columns[end_index - 1]

result = self._model.fit(X)
self._factor_exposures[index_name] = result.factor_exposures
self._factors[index_name] = result.factors
self._residual_returns[index_name] = result.residual_returns

return self
82 changes: 82 additions & 0 deletions src/fpm_risk_model/statistical/rolling_pca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from typing import Optional, Union

import numpy as np
import pandas as pd

from ..factor_risk_model import FactorRiskModel
from .pca import PCA


class RollingPCA(FactorRiskModel):
def __init__(
self,
n_components: int,
rolling_timeframe: int,
demean: Optional[bool] = True,
speedup: Optional[bool] = True,
):
"""
Constructor.
Parameters
----------
n_components : int
Number of components.
rolling_timeframe: int
Number of rolling time frames.
demean : Optional[bool]
Indicate whether to demean before fitting. Default is True.
speedup: Optional[bool]
Indicate whether to speed up the computation as much as possible.
Default is True.
"""
super().__init__()
self._n_components = n_components
self._demean = demean
self._rolling_timeframe = rolling_timeframe
self._speedup = speedup
self._model = PCA(n_components=n_components, demean=demean, speedup=speedup)

def fit(
self,
X: Union[np.ndarray, pd.DataFrame],
) -> object:
"""
Fit the returns into the risk model.
Parameters
----------
X: pandas.DataFrame or numpy.ndarray
Instrument returns where the rows are the instruments
and the columns are the date / time in ascending order.
For example, if there are N instruments and T days of
returns, the input is with the dimension of (N, T).
Returns
-------
object
The object itself.
"""
self._factor_exposures = {}
self._factors = {}
self._residual_returns = {}

for index in range(0, X.shape[1]):
start_index = index
end_index = index + self._rolling_timeframe + 1
if end_index > X.shape[1]:
break

if isinstance(X, pd.DataFrame):
X_input = X.iloc[:, start_index:end_index]
index_name = X.columns[end_index - 1]
elif isinstance(X, np.ndarray):
X_input = X[:, start_index:end_index]
index_name = end_index - 1

result = self._model.fit(X_input)
self._factor_exposures[index_name] = result.factor_exposures
self._factors[index_name] = result.factors
self._residual_returns[index_name] = result.residual_returns

return self
5 changes: 2 additions & 3 deletions tests/statistical/pca/test_pca.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import pytest

import numpy as np
import pandas as pd
import pytest

from fpm_risk_model.statistical.pca import PCA
from fpm_risk_model.statistical import PCA


@pytest.fixture(scope="module")
Expand Down
Loading

0 comments on commit 603344c

Please sign in to comment.