Skip to content

Commit

Permalink
106 how to calculate the cov mat for recent days but not all days cov…
Browse files Browse the repository at this point in the history
… mats and select recent ones (#179)

* iewma to handle nans

* funcionality to only solve at some time steps

* minor change

* edits to handling nan

* changed code file in readme file

* updated nan implementation
  • Loading branch information
kasperjo authored Mar 25, 2024
1 parent 7e9011f commit 2f0ed1f
Show file tree
Hide file tree
Showing 9 changed files with 1,820 additions and 504 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ import pandas as pd
from cvx.covariance.combination import from_ewmas

# Load return data
returns = pd.read_csv("data/ff5.csv", index_col=0, header=0, parse_dates=True).iloc[:1000]
returns = pd.read_csv("data/ff5_no_rf.csv", index_col=0, header=0, parse_dates=True).iloc[:1000]
n = returns.shape[1]

# Define half-life pairs for K=3 experts, (halflife_vola, halflife_cov)
Expand Down Expand Up @@ -107,7 +107,7 @@ import pandas as pd
from cvx.covariance.combination import from_sigmas

# Load return data
returns = pd.read_csv("data/ff5.csv", index_col=0,
returns = pd.read_csv("data/ff5_no_rf.csv", index_col=0,
header=0, parse_dates=True).iloc[:1000]
n = returns.shape[1]

Expand Down
36 changes: 27 additions & 9 deletions cvx/covariance/combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,15 +218,16 @@ def assets(self):
"""
return self.returns.columns

def solve(self, window=None, **kwargs):
def solve(self, window=None, times=None, **kwargs):
"""
The size of the window is crucial to specify the size of the parameters
for the cvxpy problem. Hence those computations are not in the __init__ method
Solves the covariance combination problem at a given time, i.e.,
finds the prediction for the covariance matrix at 'time+1'
Solves the covariance combination problem at a time steps given in times
param window: number of previous time steps to use in the covariance combination
param window: number of previous time steps to use in the covariance
combination problem
param times: list of time steps to solve the problem at; if None, solve at all available time steps
"""
# If window is None, use all available data; cap window at length of data
window = window or len(self.__Ls_shifted)
Expand All @@ -240,9 +241,29 @@ def solve(self, window=None, **kwargs):
}
)

Bs = {time: np.column_stack(Lts_at_r.loc[time]) for time in Lts_at_r.index}
# time steps to solve the problem at
all_available_times = Lts_at_r.index

if times is None:
times = Lts_at_r.index
else:
times = list(times)

# assert times are consecutive in all_available_times
zero_index = all_available_times.get_loc(times[0])
assert list(
all_available_times[zero_index : zero_index + len(times)]
) == list(times)
"times must be consecutive"

# add window - 1 previous time steps to times
times = (
list(all_available_times[zero_index - window + 1 : zero_index]) + times
)

Bs = {time: np.column_stack(Lts_at_r.loc[time]) for time in times}
prod_Bs = pd.Series({time: B.T @ B for time, B in Bs.items()})
times = prod_Bs.index
# times = prod_Bs.index
P = {
times[i]: sum(prod_Bs.loc[times[i - window + 1] : times[i]])
for i in range(window - 1, len(times))
Expand Down Expand Up @@ -282,9 +303,6 @@ def _solve(self, time, problem, **kwargs):
"""
problem.solve(**kwargs)

# if problem.status != "optimal":
# raise cvx.SolverError

weights = problem.weights

# Get non-shifted L
Expand Down
90 changes: 59 additions & 31 deletions cvx/covariance/ewma.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ def center(returns, halflife, min_periods=0, mean_adj=False):
"""
if mean_adj:
mean = _generator2frame(
_ewma_mean(data=returns, halflife=halflife, min_periods=min_periods)
_ewma_mean(
data=returns,
halflife=halflife,
min_periods=min_periods,
)
)
return (returns - mean).dropna(how="all", axis=0), mean
else:
Expand Down Expand Up @@ -94,7 +98,25 @@ def iterated_ewma(
mu_halflife1=None,
mu_halflife2=None,
clip_at=None,
nan_to_num=True,
):
"""
param returns: pandas dataframe with returns for each asset
param vola_halflife: half life for volatility
param cov_halflife: half life for covariance
param min_preiods_vola: minimum number of periods to use for volatility
ewma estimate
param min_periods_cov: minimum number of periods to use for covariance
ewma estimate
param mean: whether to estimate mean; if False, assumes zero mean data
param mu_halflife1: half life for the first mean adjustment; if None, it is
set to vola_halflife; only applies if mean=True
param mu_halflife2: half life for the second mean adjustment; if None, it is
set to cov_halflife; only applies if mean=True
param clip_at: winsorizes ewma update at +-clip_at*(current ewma) in ewma;
if None, no winsorization is performed
nan_to_num: if True, replace NaNs in returns with 0.0
"""
mu_halflife1 = mu_halflife1 or vola_halflife
mu_halflife2 = mu_halflife2 or cov_halflife

Expand All @@ -104,7 +126,13 @@ def scale_cov(vola, matrix):
matrix = matrix.values

# Convert (covariance) matrix to correlation matrix
v = 1 / np.sqrt(np.diagonal(matrix)).reshape(-1, 1)
# v = 1 / np.sqrt(np.diagonal(matrix)).reshape(-1, 1)
diag = np.diagonal(matrix).copy()
one_inds = np.where(diag == 0)[0]
if one_inds.size > 0:
diag[one_inds] = 1 # temporarily, to avoid division by zero
v = 1 / np.sqrt(diag).reshape(-1, 1)
v[one_inds] = np.nan
matrix = v * matrix * v.T

cov = vola.reshape(-1, 1) * matrix * vola.reshape(1, -1)
Expand All @@ -114,14 +142,18 @@ def scale_cov(vola, matrix):
def scale_mean(vola, vec1, vec2):
return vec1 + vola * vec2

if nan_to_num:
if returns.isna().any().any():
returns = returns.fillna(0.0)

# compute the moving mean of the returns

# TODO: Check if this is correct half life
returns, returns_mean = center(
returns=returns, halflife=mu_halflife1, min_periods=0, mean_adj=mean
)

# estimate the volatility, clip some returns before they enter the estimation
# estimate the volatility, clip some returns before they enter the
# estimation
vola = volatility(
returns=returns,
halflife=vola_halflife,
Expand All @@ -130,27 +162,26 @@ def scale_mean(vola, vec1, vec2):
)

# adj the returns
adj = clip((returns / vola), clip_at=clip_at)

# center the adj returns again? Yes, I think so
# TODO: Check if this is correct half life
# make sure adj is NaN where vola is zero or returns are
# NaN. This handles NaNs (which in some sense is the same as a constant
# return series, i.e., vola = 0)
adj = clip((returns / vola), clip_at=clip_at) # if vola is zero, adj is NaN
adj = adj.fillna(0.0)

# center the adj returns again
adj, adj_mean = center(adj, halflife=mu_halflife2, min_periods=0, mean_adj=mean)
# if mean:
# print(adj)
# print(adj_mean)
# assert False

m = pd.Series(np.zeros_like(returns.shape[1]), index=returns.columns)

for t, matrix in _ewma_cov(
data=adj, halflife=cov_halflife, min_periods=min_periods_cov
data=adj,
halflife=cov_halflife,
min_periods=min_periods_cov,
):
if mean:
m = scale_mean(
vola=vola.loc[t].values, vec1=returns_mean.loc[t], vec2=adj_mean.loc[t]
)

else:
m = pd.Series(np.zeros_like(returns.shape[1]), index=returns.columns)
yield IEWMA(
time=t,
mean=m,
Expand All @@ -164,6 +195,7 @@ def _ewma_cov(data, halflife, min_periods=0):
param data: Txn pandas DataFrame of returns
param halflife: float, halflife of the EWMA
"""

for t, ewma in _general(
data.values,
times=data.index,
Expand Down Expand Up @@ -191,8 +223,6 @@ def _ewma_mean(data, halflife, min_periods=0, clip_at=None):
def _general(
y,
times,
# com: Union[float, None] = None,
# span: Union[float, None] = None,
halflife: float | TimedeltaConvertibleTypes | None = None,
alpha: float | None = None,
fct=lambda x: x,
Expand All @@ -202,12 +232,12 @@ def _general(
"""
y: frame with measurements for times t=t_1,t_2,...,T
halflife: EWMA half life
alpha: EWMA alpha
fct: function to apply to y
min_periods: minimum number of observations to start EWMA
clip_at: clip y_last at +- clip_at*EWMA (optional)
returns: list of EWMAs for times t=t_1,t_2,...,T
serving as predictions for the following timestamp
The function returns a generator over
t_i, EWMA of fct(y_i)
returns: a generator over (t_i, EWMA of fct(y_i))
"""

def f(k):
Expand All @@ -216,12 +246,6 @@ def f(k):

return times[k], _ewma

# if com:
# alpha = 1 / (1 + com)

# if span:
# alpha = 2 / (span + 1)

if halflife:
alpha = 1 - np.exp(-np.log(2) / halflife)

Expand All @@ -233,13 +257,17 @@ def f(k):

# iterate through all the remaining rows of y. Start in the 2nd row
for n, row in enumerate(y[1:], start=1):
# replace NaNs in row with non-NaN values from _ewma and vice versa

next_val = fct(row)

# update the moving average
if clip_at and n >= min_periods + 1:
_ewma = _ewma + (1 - beta) * (
np.clip(fct(row), -clip_at * _ewma, clip_at * _ewma) - _ewma
np.clip(next_val, -clip_at * _ewma, clip_at * _ewma) - _ewma
) / (1 - np.power(beta, n + 1))
else:
_ewma = _ewma + (1 - beta) * (fct(row) - _ewma) / (
_ewma = _ewma + (1 - beta) * (next_val - _ewma) / (
1 - np.power(beta, n + 1)
)

Expand Down
Loading

0 comments on commit 2f0ed1f

Please sign in to comment.