Skip to content

Commit

Permalink
Adds scaling experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
phschiele committed Dec 7, 2023
1 parent 236766f commit 96db757
Show file tree
Hide file tree
Showing 5 changed files with 489 additions and 31 deletions.
38 changes: 24 additions & 14 deletions experiments/backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ def data_folder():
def load_data() -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
prices = pd.read_csv(data_folder() / "prices.csv", index_col=0, parse_dates=True)
spread = pd.read_csv(data_folder() / "spreads.csv", index_col=0, parse_dates=True)
volume = pd.read_csv(data_folder() / "volumes.csv", index_col=0, parse_dates=True)
rf = pd.read_csv(data_folder() / "rf.csv", index_col=0, parse_dates=True).iloc[:, 0]
return prices, spread, volume, rf
return prices, spread, rf


@dataclass
Expand All @@ -37,9 +36,9 @@ class OptimizationInput:

prices: pd.DataFrame
mean: pd.Series
covariance: pd.DataFrame
chol: np.array
volas: np.array
spread: pd.DataFrame
volume: pd.DataFrame
quantities: np.ndarray
cash: float
risk_target: float
Expand All @@ -59,7 +58,7 @@ def run_backtest(
weights and then execute the trades at time t.
"""

prices, spread, volume, rf = load_data()
prices, spread, rf = load_data()
n_assets = prices.shape[1]

lookback = 500
Expand All @@ -81,12 +80,15 @@ def run_backtest(
.dropna()
) # At time t includes data up to t+1
covariance_df = returns.ewm(halflife=125).cov() # At time t includes data up to t
days = returns.index
indices = range(lookback, len(prices) - forward_smoothing)
days = [prices.index[t] for t in indices]
covariances = {}
cholesky_factorizations = {}
for day in days:
covariances[day] = covariance_df.loc[day]
cholesky_factorizations[day] = np.linalg.cholesky(covariances[day].values)

for t in range(lookback, len(prices) - forward_smoothing):
for t in indices:
start_time = time.perf_counter()

day = prices.index[t]
Expand All @@ -96,17 +98,18 @@ def run_backtest(

prices_t = prices.iloc[t - lookback : t + 1] # Up to t
spread_t = spread.iloc[t - lookback : t + 1]
volume_t = volume.iloc[t - lookback : t + 1]

mean_t = means.loc[day] # Forecast for return t to t+1
covariance_t = covariances[day] # Forecast for covariance t to t+1
chol_t = cholesky_factorizations[day]
volas_t = np.sqrt(np.diag(covariance_t.values))

inputs_t = OptimizationInput(
prices_t,
mean_t,
covariance_t,
chol_t,
volas_t,
spread_t,
volume_t,
quantities,
cash,
risk_target,
Expand Down Expand Up @@ -185,7 +188,10 @@ def interest_and_fees(
cash_interest = cash * (1 + rf) ** days_t_to_t_minus_1 - cash
short_valuations = np.clip(quantities, None, 0) * prices
short_value = short_valuations.sum()
shorting_fee = short_value * (1 + rf) ** days_t_to_t_minus_1 - short_value
short_spread = 0.05 / 360
shorting_fee = (
short_value * (1 + rf + short_spread) ** days_t_to_t_minus_1 - short_value
)
return cash_interest + shorting_fee


Expand Down Expand Up @@ -249,12 +255,16 @@ def asset_weights(self):
return self.valuations.div(self.portfolio_value, axis=0)

@property
def turnover(self) -> float:
def daily_turnover(self) -> pd.Series:
trades = self.quantities.diff()
prices = load_data()[0].loc[self.history]
valuation_trades = trades * prices
relative_trades = valuation_trades.div(self.portfolio_value, axis=0)
return relative_trades.abs().sum(axis=1).mean() * self.periods_per_year
return relative_trades.abs().sum(axis=1)

@property
def turnover(self) -> float:
return self.daily_turnover.mean() * self.periods_per_year

@property
def mean_return(self) -> float:
Expand All @@ -274,7 +284,7 @@ def max_leverage(self) -> float:

@property
def sharpe(self) -> float:
risk_free = load_data()[3].loc[self.history]
risk_free = load_data()[2].loc[self.history]
excess_return = self.portfolio_returns - risk_free
return (
excess_return.mean() / excess_return.std() * np.sqrt(self.periods_per_year)
Expand Down
160 changes: 160 additions & 0 deletions experiments/scaling_large.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import cvxpy as cp
import numpy as np
import pandas as pd
from utils import generate_random_inputs


def main():
fitting = True
scenarios = get_scenarios(fitting=fitting)
res = []
for n_assets, n_factors in scenarios:
print(f"Running scenario with {n_assets} assets and {n_factors} factors")
solvers = [cp.CLARABEL] if fitting else [cp.MOSEK, cp.CLARABEL]
for solver in solvers:
for _ in range(1):
problem = run_scaling(n_assets, n_factors, solver)
assert problem.status in {
cp.OPTIMAL,
cp.OPTIMAL_INACCURATE,
}, problem.status

res.append(
{
"n_assets": n_assets,
"n_factors": n_factors,
"solve_time": problem.solver_stats.solve_time,
"solver": solver,
}
)

df = pd.DataFrame(res)

df = df.groupby(["n_assets", "n_factors", "solver"]).mean().reset_index()

if fitting:
# Estimate the scaling exponents as solve time \approx a * n_assets^b * n_factors^c
n_assets = df["n_assets"].values
n_factors = df["n_factors"].values
log_solve_time = np.log(df["solve_time"].values)

a = cp.Variable()
b = cp.Variable()
c = cp.Variable()

objective = cp.Minimize(
cp.sum_squares(
a + b * np.log(n_assets) + c * np.log(n_factors) - log_solve_time
)
)
problem = cp.Problem(objective)
problem.solve()
assert problem.status in {cp.OPTIMAL, cp.OPTIMAL_INACCURATE}, problem.status

print(
f"Estimated scaling exponents: a={np.exp(a.value):.2f}, \
b={b.value:.2f}, c={c.value:.2f}"
)

else:
df.set_index(["n_assets", "n_factors"], inplace=True)
df = df.pivot(columns="solver", values="solve_time")
df = df.map(lambda x: f"{x:.2f}")
df = df.loc[:, [cp.MOSEK, cp.CLARABEL]]

# Reset column and row indices
df.reset_index(inplace=True)
df.columns.name = None
df.index.name = None

print(df.to_latex(index=False))


def run_scaling(
n_assets: int, n_factors: int, solver: str
) -> tuple[np.ndarray, float, cp.Problem]:
mean, F, covariance = generate_random_inputs(n_assets, n_factors)
factor_chol = np.linalg.cholesky(covariance)
factor_volas = np.diag(factor_chol)

equal_weights = np.ones(n_assets) / n_assets
np.sqrt(equal_weights @ F @ covariance @ F.T @ equal_weights)
sigma_target = 0

# The risk constraint is soft.
# For each percentage point of risk, we need to compensate with
# 5 percentage points of return.

rho_mean = np.percentile(np.abs(mean), 20, axis=0) * np.ones(n_assets)
rho_covariance = 0.02
L_max = 1.6
T_max = 50 / 252

risk_free = 0.0001
w_lower = np.ones(n_assets) * (-0.05)
w_upper = np.ones(n_assets) * 0.1
c_lower = -0.05
c_upper = 1.0
gamma_risk = 5.0

w_prev = np.ones(n_assets) / n_assets
c_prev = 0.0

w, c = cp.Variable(n_assets), cp.Variable()

z = w - w_prev
T = cp.norm1(z)
L = cp.norm1(w)

# worst-case (robust) return
mean_return = w @ mean + risk_free * c
return_uncertainty = rho_mean @ cp.abs(w)
return_wc = mean_return - return_uncertainty

# worst-case (robust) risk
risk = cp.norm2((F @ factor_chol).T @ w)
factor_volas = cp.norm2(F @ factor_chol, axis=1)

risk_uncertainty = rho_covariance**0.5 * factor_volas @ cp.abs(w)
risk_wc = cp.norm2(cp.hstack([risk, risk_uncertainty]))

objective = return_wc - gamma_risk * cp.pos(risk_wc - sigma_target)

constraints = [
cp.sum(w) + c == 1,
c == c_prev - cp.sum(z),
c_lower <= c,
c <= c_upper,
w_lower <= w,
w <= w_upper,
L <= L_max,
T <= T_max,
]

problem = cp.Problem(cp.Maximize(objective), constraints)
problem.solve(solver=solver, verbose=False)

assert problem.status in {cp.OPTIMAL, cp.OPTIMAL_INACCURATE}, problem.status
return problem


def get_scenarios(fitting=False):
if not fitting:
return [
(100, 10),
(500, 30),
(500, 50),
(2000, 50),
(2000, 100),
(10000, 50),
(10000, 100),
]
else:
# fine grid for fitting
assets = np.logspace(2, 3, 10, dtype=int)
pairs = [(a, int(a * f)) for a in assets for f in np.logspace(-2, -1, 10)]
return pairs


if __name__ == "__main__":
main()
Loading

0 comments on commit 96db757

Please sign in to comment.