Ex Japan Asia ETF Arbitrage

Any ideas please to improve alpha and reduce drawdowns?

10 responses

A more realistic back test with 1 cent per share transaction costs and schedule function to trade at market open.

61
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
import math
import numpy as np
import datetime
import pandas as pd
from pytz import timezone
from sklearn.decomposition import PCA
from zipline.utils import tradingcalendar as calendar
import statsmodels.api as sm
from statsmodels import tsa

class LogReturnSeries:
def __init__(self, t):
self.T = t
self.log_returns = None

log_prices = []
for ii in range(5, len(prices)):
if prices[ii-5] == 0:
lg = 0
else:
lg = math.log(prices[ii] / prices[ii-5]) #- math.log(spyprices[ii] / spyprices[ii-1])
lg = [lg]
log_prices.append(lg)
if self.log_returns is None:
self.log_returns = np.array(log_prices)
else:
self.log_returns = np.hstack((self.log_returns, np.array(log_prices)))

def get_series(self):
return np.copy(np.matrix(self.log_returns))

class Strategy:
def __init__(self, T, e):
self.T = T
self.K = e
self.N = 100
self.log_returns = LogReturnSeries(T)

def compute_betas_by_ols(self, R, d):
lower_levels = []
upper_levels = []
les = []
ues = []
fitted_values = []
slopes = []
p_values = []
r_squared = []
for ii in range(0, self.N):
l, u, le, ue, f, p, r, s = self.compute_betas_by_ols_stock(d, R[:, ii])
slopes.append(s)
lower_levels.append(l)
upper_levels.append(u)
les.append(le)
ues.append(ue)
fitted_values.append(f)
p_values.append(p)
r_squared.append(r)
return lower_levels, upper_levels, les, ues, fitted_values, p_values, r_squared, slopes

def compute_betas_by_ols_stock(self, Dm, Fm):
ols = sm.OLS(Fm, Dm).fit()
l, u = ols.conf_int(0.05).T
le, ue = ols.conf_int(0.05).T
f = ols.fittedvalues[-1]
s = ols.fittedvalues[-1] - ols.fittedvalues[-2]
p = ols.f_pvalue
return l, u, le, ue, f, p, r, s

def calculate(self, N):
self.N = N
ret = self.log_returns.get_series()
pca = PCA(self.K)
d = pca.fit_transform(ret)

if np.isnan(d).any() or np.isnan(ret).any() or np.isinf(d).any() or np.isinf(ret).any():
return None

l, u, les, ues, f, pv, rs, sl = self.compute_betas_by_ols(ret, d)

signal = []

for ii in range(0, N):
r = ret[-1, ii]
lr = np.matrix(l[ii][1:])
dr = np.matrix(d[-1]).T

if np.shape(lr) <> np.shape(dr):
return None

lr = lr * dr + l[ii]

ur = np.matrix(u[ii][1:])

if np.shape(ur) <> np.shape(dr):
return None

ur = ur * dr + u[ii]

lre = np.matrix(les[ii][1:])

if np.shape(lre) <> np.shape(dr):
return None

lre = lre * dr + les[ii]

ure = np.matrix(ues[ii][1:])

if np.shape(ure) <> np.shape(dr):
return None

ure = ure * dr + ues[ii]

if rs[ii] < 0.9 or pv[ii] > 0.01:
signal.append(0)
return

if r > f[ii] and sl[ii] < 0:
signal.append(1)
elif r > f[ii] and sl[ii] > 0 and r < ur[0,0]:
signal.append(-1)
elif r > f[ii] and sl[ii] > 0 and r > ur[0,0]:
signal.append(1)
elif r < f[ii] and sl[ii] < 0 and r > lr[0,0]:
signal.append(1)
elif r < f[ii] and sl[ii] < 0 and r < lr[0,0]:
signal.append(-1)
elif r < f[ii] and sl[ii] > 0 and r > lr[0,0]:
signal.append(-1)
elif r < f[ii] and sl[ii] > 0 and r < lr[0,0]:
signal.append(1)
else:
signal.append(0)

return signal

def initialize(context):
set_symbol_lookup_date('2014-12-01')
context.symbols = symbols('EWT',
'AAXJ',
'VPL',
'EPP',
'EWA',
'EWS',
'GMF',
'IPAC',
'ENZL',
'DVYA',
'AUSE',
'AXJL',
'PAF',
'FHK',
'FPA',
'FTW',
'EWSS',
'DXKW',
'QAUS',
'DBAP',
'KROO',
'QTWN',
'AXJS',
'AXJV',
'HKOR',
'DBKO',
'GMFS',
'FKO',
'QKOR',
'EWAS',
'FAUS')
context.AAXJ = sid(36750)
set_benchmark(context.AAXJ)
schedule_function(func=myfunc, date_rule=date_rules.every_day(),time_rule=time_rules.market_open(hours=0,minutes=5))

def handle_data(context, data):
pass

def myfunc(context, data):
context.counter = 0
pricehistory = history(bar_count=51, frequency='1d', field='close_price')
strategy = Strategy(50,5)
idx = 0
for sid in data:
if np.isnan(pricehistory[sid].values).any():
else:
idx += 1

signal = strategy.calculate(idx)
if signal is None:
return

sidx = 0

for sid in data:
if signal[sidx] == 1 and context.portfolio.positions[sid].amount <= 0:
order_target_value(sid, 60000)
elif signal[sidx] == -1 and context.portfolio.positions[sid].amount >= 0:
order_target_value(sid, -60000)
else:
if context.portfolio.positions[sid].amount > 0 and signal[sidx] < 1:
order_target(sid, 0)
if context.portfolio.positions[sid].amount < 0 and signal[sidx] > -1:
order_target(sid, 0)

sidx += 1

record(p=context.portfolio.capital_used)
There was a runtime error.

And finally optimized version.

61
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
import math
import numpy as np
import datetime
import pandas as pd
from pytz import timezone
from sklearn.decomposition import PCA
from zipline.utils import tradingcalendar as calendar
import statsmodels.api as sm
from statsmodels import tsa

class LogReturnSeries:
def __init__(self, t):
self.T = t
self.log_returns = None

log_prices = []
for ii in range(5, len(prices)):
if prices[ii-5] == 0:
lg = 0
else:
lg = math.log(prices[ii] / prices[ii-5]) #- math.log(spyprices[ii] / spyprices[ii-1])
lg = [lg]
log_prices.append(lg)
if self.log_returns is None:
self.log_returns = np.array(log_prices)
else:
self.log_returns = np.hstack((self.log_returns, np.array(log_prices)))

def get_series(self):
return np.copy(np.matrix(self.log_returns))

class Strategy:
def __init__(self, T, e):
self.T = T
self.K = e
self.N = 100
self.log_returns = LogReturnSeries(T)

def compute_betas_by_ols(self, R, d):
lower_levels = []
upper_levels = []
les = []
ues = []
fitted_values = []
slopes = []
p_values = []
r_squared = []
for ii in range(0, self.N):
l, u, le, ue, f, p, r, s = self.compute_betas_by_ols_stock(d, R[:, ii])
slopes.append(s)
lower_levels.append(l)
upper_levels.append(u)
les.append(le)
ues.append(ue)
fitted_values.append(f)
p_values.append(p)
r_squared.append(r)
return lower_levels, upper_levels, les, ues, fitted_values, p_values, r_squared, slopes

def compute_betas_by_ols_stock(self, Dm, Fm):
ols = sm.OLS(Fm, Dm).fit()
l, u = ols.conf_int(0.01).T
le, ue = ols.conf_int(0.05).T
f = ols.fittedvalues[-1]
s = ols.fittedvalues[-1] - ols.fittedvalues[-2]
p = ols.f_pvalue
return l, u, le, ue, f, p, r, s

def calculate(self, N):
self.N = N
ret = self.log_returns.get_series()
pca = PCA(self.K)
d = pca.fit_transform(ret)

if np.isnan(d).any() or np.isnan(ret).any() or np.isinf(d).any() or np.isinf(ret).any():
return None

l, u, les, ues, f, pv, rs, sl = self.compute_betas_by_ols(ret, d)

signal = []

for ii in range(0, N):
r = ret[-1, ii]
lr = np.matrix(l[ii][1:])
dr = np.matrix(d[-1]).T

if np.shape(lr) <> np.shape(dr):
return None

lr = lr * dr + l[ii]

ur = np.matrix(u[ii][1:])

if np.shape(ur) <> np.shape(dr):
return None

ur = ur * dr + u[ii]

lre = np.matrix(les[ii][1:])

if np.shape(lre) <> np.shape(dr):
return None

lre = lre * dr + les[ii]

ure = np.matrix(ues[ii][1:])

if np.shape(ure) <> np.shape(dr):
return None

ure = ure * dr + ues[ii]

if rs[ii] < 0.8 or pv[ii] > 0.01:
signal.append(0)
return

if r > f[ii] and sl[ii] < 0 and r > ur[0,0]:
signal.append(1)
elif r > f[ii] and sl[ii] > 0 and r < ur[0,0]:
signal.append(1)
elif r < f[ii] and sl[ii] < 0 and r > lr[0,0]:
signal.append(-1)
elif r < f[ii] and sl[ii] < 0 and r < lr[0,0]:
signal.append(1)
elif r < f[ii] and sl[ii] > 0 and r > lr[0,0]:
signal.append(1)
else:
signal.append(0)

return signal

def initialize(context):
set_symbol_lookup_date('2014-12-01')
context.symbols = symbols('EWT',
'AAXJ',
'VPL',
'EPP',
'EWA',
'EWS',
'GMF',
'IPAC',
'ENZL',
'DVYA',
'AUSE',
'AXJL',
'PAF',
'FHK',
'FPA',
'FTW',
'EWSS',
'DXKW',
'QAUS',
'DBAP',
'KROO',
'QTWN',
'AXJS',
'AXJV',
'HKOR',
'DBKO',
'GMFS',
'FKO',
'QKOR',
'EWAS',
'FAUS')
context.AAXJ = sid(36750)
set_benchmark(context.AAXJ)
schedule_function(func=myfunc, date_rule=date_rules.every_day(),time_rule=time_rules.market_open(hours=0,minutes=5))

def handle_data(context, data):
pass

def myfunc(context, data):
context.counter = 0
pricehistory = history(bar_count=41, frequency='1d', field='close_price')
strategy = Strategy(40,0.95)
idx = 0
for sid in data:
if np.isnan(pricehistory[sid].values).any():
else:
idx += 1

signal = strategy.calculate(idx)
if signal is None:
return

sidx = 0

for sid in data:
if signal[sidx] == 1 and context.portfolio.positions[sid].amount <= 0:
order_target_value(sid, 60000)
elif signal[sidx] == -1 and context.portfolio.positions[sid].amount >= 0:
order_target_value(sid, -60000)
else:
if context.portfolio.positions[sid].amount > 0 and signal[sidx] < 1:
order_target(sid, 0)
if context.portfolio.positions[sid].amount < 0 and signal[sidx] > -1:
order_target(sid, 0)

sidx += 1

record(p=context.portfolio.capital_used)
There was a runtime error.

Thanks Pavy,

Would you be willing to provide a descriptive outline of this algorithm? Also, is it based on a published strategy? If so, is there an accessible reference? How did you pick the list of securities? Etc.

def handle_data(context, data):
pass


Normally, I put code in handle_data, but I can see the advantage of structuring the algorithm as you did. I gather that myfunc(context, data) takes its place, and is called according to schedule_function. Is this correct?

Grant

When run from July 2008 (caveat, some of these ETFs didn't exist then).

0
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
import math
import numpy as np
import datetime
import pandas as pd
from pytz import timezone
from sklearn.decomposition import PCA
from zipline.utils import tradingcalendar as calendar
import statsmodels.api as sm
from statsmodels import tsa

class LogReturnSeries:
def __init__(self, t):
self.T = t
self.log_returns = None

log_prices = []
for ii in range(5, len(prices)):
if prices[ii-5] == 0:
lg = 0
else:
lg = math.log(prices[ii] / prices[ii-5]) #- math.log(spyprices[ii] / spyprices[ii-1])
lg = [lg]
log_prices.append(lg)
if self.log_returns is None:
self.log_returns = np.array(log_prices)
else:
self.log_returns = np.hstack((self.log_returns, np.array(log_prices)))

def get_series(self):
return np.copy(np.matrix(self.log_returns))

class Strategy:
def __init__(self, T, e):
self.T = T
self.K = e
self.N = 100
self.log_returns = LogReturnSeries(T)

def compute_betas_by_ols(self, R, d):
lower_levels = []
upper_levels = []
les = []
ues = []
fitted_values = []
slopes = []
p_values = []
r_squared = []
for ii in range(0, self.N):
l, u, le, ue, f, p, r, s = self.compute_betas_by_ols_stock(d, R[:, ii])
slopes.append(s)
lower_levels.append(l)
upper_levels.append(u)
les.append(le)
ues.append(ue)
fitted_values.append(f)
p_values.append(p)
r_squared.append(r)
return lower_levels, upper_levels, les, ues, fitted_values, p_values, r_squared, slopes

def compute_betas_by_ols_stock(self, Dm, Fm):
ols = sm.OLS(Fm, Dm).fit()
l, u = ols.conf_int(0.01).T
le, ue = ols.conf_int(0.05).T
f = ols.fittedvalues[-1]
s = ols.fittedvalues[-1] - ols.fittedvalues[-2]
p = ols.f_pvalue
return l, u, le, ue, f, p, r, s

def calculate(self, N):
self.N = N
ret = self.log_returns.get_series()
pca = PCA(self.K)
d = pca.fit_transform(ret)

if np.isnan(d).any() or np.isnan(ret).any() or np.isinf(d).any() or np.isinf(ret).any():
return None

l, u, les, ues, f, pv, rs, sl = self.compute_betas_by_ols(ret, d)

signal = []

for ii in range(0, N):
r = ret[-1, ii]
lr = np.matrix(l[ii][1:])
dr = np.matrix(d[-1]).T

if np.shape(lr) <> np.shape(dr):
return None

lr = lr * dr + l[ii]

ur = np.matrix(u[ii][1:])

if np.shape(ur) <> np.shape(dr):
return None

ur = ur * dr + u[ii]

lre = np.matrix(les[ii][1:])

if np.shape(lre) <> np.shape(dr):
return None

lre = lre * dr + les[ii]

ure = np.matrix(ues[ii][1:])

if np.shape(ure) <> np.shape(dr):
return None

ure = ure * dr + ues[ii]

if rs[ii] < 0.8 or pv[ii] > 0.01:
signal.append(0)
return

if r > f[ii] and sl[ii] < 0 and r > ur[0,0]:
signal.append(1)
elif r > f[ii] and sl[ii] > 0 and r < ur[0,0]:
signal.append(1)
elif r < f[ii] and sl[ii] < 0 and r > lr[0,0]:
signal.append(-1)
elif r < f[ii] and sl[ii] < 0 and r < lr[0,0]:
signal.append(1)
elif r < f[ii] and sl[ii] > 0 and r > lr[0,0]:
signal.append(1)
else:
signal.append(0)

return signal

def initialize(context):
set_symbol_lookup_date('2014-12-01')
context.symbols = symbols('EWT',
'AAXJ',
'VPL',
'EPP',
'EWA',
'EWS',
'GMF',
'IPAC',
'ENZL',
'DVYA',
'AUSE',
'AXJL',
'PAF',
'FHK',
'FPA',
'FTW',
'EWSS',
'DXKW',
'QAUS',
'DBAP',
'KROO',
'QTWN',
'AXJS',
'AXJV',
'HKOR',
'DBKO',
'GMFS',
'FKO',
'QKOR',
'EWAS',
'FAUS')
context.AAXJ = sid(36750)
set_benchmark(context.AAXJ)
schedule_function(func=myfunc, date_rule=date_rules.every_day(),time_rule=time_rules.market_open(hours=0,minutes=5))

def handle_data(context, data):
pass

def myfunc(context, data):
context.counter = 0
pricehistory = history(bar_count=41, frequency='1d', field='close_price')
strategy = Strategy(40,0.95)
idx = 0
for sid in data:
if np.isnan(pricehistory[sid].values).any():
else:
idx += 1

signal = strategy.calculate(idx)
if signal is None:
return

sidx = 0

for sid in data:
if signal[sidx] == 1 and context.portfolio.positions[sid].amount <= 0:
order_target_value(sid, 60000)
elif signal[sidx] == -1 and context.portfolio.positions[sid].amount >= 0:
order_target_value(sid, -60000)
else:
if context.portfolio.positions[sid].amount > 0 and signal[sidx] < 1:
order_target(sid, 0)
if context.portfolio.positions[sid].amount < 0 and signal[sidx] > -1:
order_target(sid, 0)

sidx += 1

record(p=context.portfolio.capital_used)
There was a runtime error.

Alpha. Hmm, maybe that's what "seekingalpha" refers to. Feel free to describe what it means, maybe an example. Quote following from thestreet.com and I don't understand it well if at all:

""Alpha" tells you how a fund is actually doing compared to its "beta" (a volatility measure that is supposed to give you some sense of how far the fund will fall if the market takes a dive and how high the fund will rise if the bull starts to climb). If the beta is 1.5, and the fund rises 15% more than the market, then the alpha is zero."

1
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
import math
import numpy as np
import datetime
import pandas as pd
from pytz import timezone
from sklearn.decomposition import PCA
from zipline.utils import tradingcalendar as calendar
import statsmodels.api as sm
from statsmodels import tsa

class LogReturnSeries:
def __init__(self, t):
self.T = t
self.log_returns = None

log_prices = []
for ii in range(5, len(prices)):
if prices[ii-5] == 0:
lg = 0
else:
lg = math.log(prices[ii] / prices[ii-5])
#- math.log(spyprices[ii] / spyprices[ii-1])
lg = [lg]
log_prices.append(lg)
if self.log_returns is None:
self.log_returns = np.array(log_prices)
else:
self.log_returns = np.hstack(
(self.log_returns, np.array(log_prices))
)

def get_series(self):
return np.copy(np.matrix(self.log_returns))

class Strategy:
def __init__(self, T, e):
self.T = T
self.K = e
self.N = 100
self.log_returns = LogReturnSeries(T)

def compute_betas_by_ols(self, R, d):
lower_levels  = []
upper_levels  = []
les           = []
ues           = []
fitted_values = []
slopes        = []
p_values      = []
r_squared     = []
for ii in range(0, self.N):
l, u, le, ue, f, p, r, s = self.compute_betas_by_ols_stock(d, R[:, ii])
slopes.append(s)
lower_levels.append(l)
upper_levels.append(u)
les.append(le)
ues.append(ue)
fitted_values.append(f)
p_values.append(p)
r_squared.append(r)
return lower_levels, upper_levels, les, ues, fitted_values, p_values, r_squared, slopes

def compute_betas_by_ols_stock(self, Dm, Fm):
ols    = sm.OLS(Fm, Dm).fit()
l, u   = ols.conf_int(0.01).T
le, ue = ols.conf_int(0.05).T
f      = ols.fittedvalues[-1]
s      = ols.fittedvalues[-1] - ols.fittedvalues[-2]
p      = ols.f_pvalue
return l, u, le, ue, f, p, r, s

def calculate(self, N):
self.N = N
ret    = self.log_returns.get_series()
pca    = PCA(self.K)
d      = pca.fit_transform(ret)

if   np.isnan(d)  .any() \
or np.isnan(ret).any() \
or np.isinf(d)  .any() \
or np.isinf(ret).any():
return None

l, u, les, ues, f, pv, rs, sl = self.compute_betas_by_ols(ret, d)

signal = []

for ii in range(0, N):
lr = np.matrix(l[ii][1:])
dr = np.matrix(d[-1]).T
if np.shape(lr) <> np.shape(dr):
return None

lr = lr * dr + l[ii]
ur = np.matrix(u[ii][1:])
if np.shape(ur) <> np.shape(dr):
return None

lre = np.matrix(les[ii][1:])
if np.shape(lre) <> np.shape(dr):
return None

#lre = lre * dr + les[ii]
ure = np.matrix(ues[ii][1:])
if np.shape(ure) <> np.shape(dr):
return None

#ure = ure * dr + ues[ii]

if rs[ii] < 0.8 or pv[ii] > 0.01:
signal.append(0)
return

r  = ret[-1, ii]
ur  = ur * dr + u[ii]
if r > f[ii] and sl[ii] < 0 and r > ur[0,0]:
signal.append(1)
elif r > f[ii] and sl[ii] > 0 and r < ur[0,0]:
signal.append(1)
elif r < f[ii] and sl[ii] < 0 and r > lr[0,0]:
signal.append(-1)
elif r < f[ii] and sl[ii] < 0 and r < lr[0,0]:
signal.append(1)
elif r < f[ii] and sl[ii] > 0 and r > lr[0,0]:
signal.append(1)
else:
signal.append(0)

return signal

def initialize(context):
set_symbol_lookup_date('2014-12-01')
context.symbols = symbols(
'AAXJ',
'AUSE',
'AXJL',
'AXJS',
'AXJV',
'DBAP',
'DBKO',
'DVYA',
'DXKW',
'ENZL',
'EPP',
'EWA',
'EWAS',
'EWS',
'EWSS',
'EWT',
'FAUS',
'FHK',
'FKO',
'FPA',
'FTW',
'GMF',
'GMFS',
'HKOR',
'IPAC',
'KROO',
'PAF',
'QAUS',
'QKOR',
'QTWN',
'VPL',
)
context.AAXJ = sid(36750)
set_benchmark(context.AAXJ)
schedule_function(
func=myfunc,
date_rule=date_rules.every_day(),
time_rule=time_rules.market_open(hours=0,minutes=5)
)

context.purchased = {}

schedule_function(func = summary, date_rule = date_rules.every_day())

def handle_data(context, data):
pass

def myfunc(context, data):
context.counter = 0
pricehistory    = history(bar_count=41, frequency='1d', field='close_price')
strategy        = Strategy(40, 0.95)
idx = 0
for sid in data:
if np.isnan(pricehistory[sid].values).any():
else:
idx += 1

signal = strategy.calculate(idx)
if signal is None:
return

sidx = 0

for sid in data:
#if get_open_orders(sid):
#    continue
shares  = context.portfolio.positions[sid].amount
price   = data[sid].price
cash    = context.portfolio.cash
alloc   = int( cash / len(data.keys()) ) # allocation

if signal[sidx] == 1:
context.purchased[sid] = 1
elif signal[sidx] == -1 and shares > 0:
sale = min(shares, 2000)
order(sid, -sale)
#order_target_value(sid, -sale)
else:
if signal[sidx] == 1 and shares > 0:
#order_target(sid, 0)
order_target_percent(sid, .2)
elif signal[sidx] > -1 and shares > 0:
order_target(sid, 0)
#order_target_percent(sid, .05)

'''
if signal[sidx] == 1 and context.portfolio.positions[sid].amount <= 0:
order_target_value(sid, 60000)
elif signal[sidx] == -1 and context.portfolio.positions[sid].amount >= 0:
order_target_value(sid, -60000)
else:
if context.portfolio.positions[sid].amount > 0 and signal[sidx] < 1:
order_target(sid, 0)
if context.portfolio.positions[sid].amount < 0 and signal[sidx] > -1:
order_target(sid, 0)
'''

sidx += 1

if sid not in context.purchased:
# gh: Unthinkingly buying everything at least once.
context.purchased[sid] = 1

record(p=context.portfolio.capital_used)

def summary(context, data):
'''
Summary processing

https://www.quantopian.com/posts/run-summary
'''
# Need a couple of imports, you might need to comment these out if already imported.
#   That's pretty much the only change that might be necessary.
###from pytz import timezone
import re

# Yes try/except is narly yet makes to work with set_universe etc.
# Is there a better way?  An -| if 'books' in context: |- didn't work.
try:
context['books']    # See if this key exists yet.
b = context.books   # For brevity.
except:
'''
Preparation. Initialize one time.
'''
cash = context.portfolio.starting_cash
context.books = {   # Starting cash value from GUI or live restart...
'cash'          : cash,
'init_cash'     : cash,
'cash_low'      : cash,
'shares'        : 0,
'shares_value'  : 0,
'count_sell'    : 0,       # Overall sell count.
'cnt_sel_evnts' : 0,
'summary_print' : 0,
'costs_total'   : 0,       # Commissions.
'sids_seen'     : [],      # For set_universe since dynamic.
'arena'         : None,    # To become 'backtest' or 'live'.
'mode'          : None,    # To become 'daily' or 'minute'.
'prep_prnt'     : '',
'orders'        : {        # Keep orders for accounting.
'open' : {},           # Orders not completely filled yet.
'syms' : {},           # Dict of symbols with open orders,
},                         #   can be used to not order over another.
}

b = context.books

# First/last dates and
#   Arena: backtest or live.  Mode: daily or minute.
try:
# Prep some environment info
arena     = re.findall('Live|Simulation', str(sid))
b['mode'] = re.findall('data_frequency=(.*?),', str(sid))

if arena == 'Live':
b['arena'] = 'live'
elif arena == 'Simulation':
b['arena'] = 'backtest'
except:
log.info('Error in str(sid), summary will not print.')
b['arena'] = 'Arena unknown'
b['mode']  = 'Mode unknown'

# Show environment at the beginning of the run
msg  = ' {0:s}\n  {1:s}  {2:s} to {3:s}  {4:s}  {5:s}\n'
b['prep_prnt'] = msg.format(
b['arena'],
b['mode'],
'   $' + "%.0f" % b['cash'], ' First bar stocks ({}) ...'.format(len(data)), ) # Show current universe once for sec in data: if isinstance(sec, basestring): continue # Skip any injected fetcher string keys. b['prep_prnt'] += (sec.symbol + ' ') log.info(b['prep_prnt']) ''' Prepare individual securities dictionaries with dynamic set_universe, fetcher, IPO's appearing etc. ''' for sec in data: if isinstance(sec, basestring): continue # Skip any injected fetcher string keys. sym = sec.symbol # Scenarios with price missing ... price = data[sec].price if 'price' in data[sec] else 0 if sym in b: continue if sec not in b['sids_seen']: b['sids_seen'].append(sec) b[sym] = { 'init_price' : price, # Save for summary. 'price' : price, # Most recent price. 'cash_low' : 0, # Lowest level of cash. 'balance' : 0, # For individual 'x' return. 'shares' : 0, 'count_buy' : 0, # Individual buy number of shares. 'count_sell' : 0, 'cnt_buy_evnts' : 0, # Individual buy events count. 'cnt_sel_evnts' : 0, } ''' Accounting. Update the numbers, manage orders if any. ''' accounting = {} # Locally, any orders ready to be counted. # Read open orders for security, oo_for_sid in get_open_orders().iteritems(): sym = security.symbol for order_obj in oo_for_sid: # Convenience option to be able in handle_data to # avoid ordering if an order already exists. b['orders']['syms'][sym] = 1 # If an order not seen before, add for tracking if order_obj.id not in b['orders']['open']: b['orders']['open'][order_obj.id] = order_obj.filled # Take a look at current orders for id in b['orders']['open']: o = get_order(id) # Current order, might be updated. # If filled is not zero, account for it if o.filled != 0: accounting[id] = o # Set to account for filled. # Bugbug: The only way I could make sense of things so far ... # If filled is not amount (shares), that's a partial fill, # cancelling remainder to simplify life. # ToDo: Not sure of official actual fill prices. if o.filled != o.amount: cancel_order(id) # You might want to change/remove this. # Do any accounting, into books{} for id in accounting: sec = accounting[id]['sid'] sym = sec.symbol b[sym]['price'] = data[sec].price if 'price' in data[sec] else b[sym]['price'] commission = accounting[id]['commission'] filled = accounting[id]['filled'] # Number filled, sell neg. lkp = b[sym]['price'] # Last known price. transaction = filled * lkp b[sym]['shares'] += filled # The transaction on sell is negative b[sym]['balance'] -= transaction # so this line adds to balance then. b[sym]['balance'] -= commission b['costs_total'] += commission if filled > 0: # Buy b[sym]['cnt_buy_evnts'] += 1 b[sym]['count_buy'] += filled elif filled < 0: # Sell b[sym]['cnt_sel_evnts'] += 1 b[sym]['count_sell'] += abs(filled) # Remove from the list, accounting done if sym in b['orders']['syms']: # There's a scenario in multiple buys del b['orders']['syms'][sym] # where this key could be gone. del b['orders']['open'][id] # Overall keep track of lowest cash point cash_now = context.portfolio.cash if cash_now < b['cash_low']: b['cash_low'] = cash_now # An alert for negative cash unless you like "leverage" if b['cash_low'] < 0: # Lowest cash points reached ... log.info(str(sym).ljust(5) \ + ' order for ' + (('$' + "%.0f" % transaction) \
+ ',').ljust(8) + ' cash low: ' + str(int(b['cash_low']))
)

# And per symbol
if b[sym]['balance'] < b[sym]['cash_low']:
b[sym]['cash_low'] = b[sym]['balance']

'''
Show summary if this is the last bar
'''
last_bar_now = 0

if not b['summary_print']:
if context.books['arena'] == 'live':
# When paper/live print summary every day end of day
last_bar_now = 1
elif context.books['arena'] == 'backtest':
# Flag for summary output if last bar now
bar = get_datetime()
if b['mode'] == 'daily':
last_bar_now = 1
elif b['mode'] == 'minute':
last_bar_now = 1

if last_bar_now or b['summary_print']:
'''
Summary output to the logging window
'''
# Independent copy of context.books using dict() in case summary print
#   is set to happen more than once in a run, due to concats below (+=)
#   although the print any time is deprecated, couldn't find a way
#   to make work with schedule_function.
b    = dict(context.books)
done = {}   # Protect against any listed twice.

# Some overall values by adding individual values
for sec in b['sids_seen']:
if sec in done:
continue
sym    = sec.symbol
shares = b[sym]['shares']
b[sym]['price'] = data[sec].price if 'price' in data[sec] else b[sym]['price']
b['count_sell']    += b[sym]['count_sell']
b['cnt_sel_evnts'] += b[sym]['cnt_sel_evnts']
b['shares']        += shares
b['shares_value']  += (shares * b[sym]['price'])
done[sec] = 1

q__portfolio  = str(int(context.portfolio.portfolio_value))
cash_end      = context.portfolio.cash
init_cash     = b['init_cash']
avg_init_cash = init_cash / len(b['sids_seen'])
cash_low      = b['cash_low']
my_portfolio  = cash_end + b['shares_value']
cash_profit   = cash_end - b['init_cash']
xval          = 'x0'
max_spent     = init_cash - cash_low
drawdown      = max(init_cash, init_cash - cash_low)
cnt_s_evts    = ('  (' + str(b['cnt_sel_evnts']) + ' trades)').rjust(17)
untouchd      = '' if int(cash_low) <= 0 else \
'  (' + str(int(cash_low)) + ' unused)'
neg_cash      = '' if int(cash_low) >= 0 else '                       ' \
+ "%.0f" % cash_low + ' max negative cash'
if drawdown  != 0:               # Pure profit over input used.
xval      = 'x'  + "%.3f" % ((my_portfolio - init_cash) / drawdown)

w1 = 16; w2 = 8  # Widths of columns
outs = [
'  QPortfolio: '.rjust(w1)+('$'+str(q__portfolio)) .rjust(w2), ' Buy Count: '.rjust(w1)+str(b['count_buy']) .rjust(w2)+cnt_b_evts, ' Sell Count: '.rjust(w1)+str(b['count_sell']) .rjust(w2)+cnt_s_evts, ' Shares Now: '.rjust(w1) + str(b['shares']) .rjust(w2), 'Shares Value: '.rjust(w1) + str(int(b['shares_value'])).rjust(w2), ' Cash Now: '.rjust(w1) + str(int(cash_end)) .rjust(w2), ' Cash Profit: '.rjust(w1) + str(int(cash_profit)) .rjust(w2), ' Commissions: '.rjust(w1) + str(int(b['costs_total'])) .rjust(w2), ' Max Spent: '.rjust(w1) + str(int(max_spent)) .rjust(w2)+neg_cash, 'Initial Cash: '.rjust(w1) + str(int(init_cash)) .rjust(w2)+untouchd, ' Portfolio: '.rjust(w1)+('$'+str(int(my_portfolio))) .rjust(w2),
]
out  = '_\r\n'
for o in outs:
out += (o + '\r\n')
out += '        Return:  ' + xval + '   Profit/Drawdown\r\n'

# -------------------------------
# Individual securities detail
# -------------------------------
out_content_collections = []
count_sids  = len(b['sids_seen'])
sec_word    = ' security' if count_sids == 1 else ' securities'
out_content = '_      ' + "%.0f" % int(b['init_cash'] / count_sids) \
+ ' average initial cash, ' + str(count_sids) + sec_word + '\r\n'
lines_out   = 11    # Log in clumps to stay under logging limits.
count_lines = 0
col_widths  = {1: 8, 2: 7, 3: 7, 4: 12, 5: 8, 6: 8, 7: 9, 8: 9, 9: 8, 10: 9}
]
'Symbol','Ratio','Hold','Count','Evnts','Strt|Now','Spent','Now','Now', 'Value'
]

cc = 1  # Column count
out_content += h.center(col_widths[cc])
cc += 1
out_content += '~\r\n' # Tilde at the end of line for replace-all in an editor
# later after copy/paste, since new lines are gone at least on Windows.
# Unfortunate to not be able to copy and paste results easily.

count_lines += 1
cc = 1
out_content += h.center(col_widths[cc])
cc += 1
out_content += '~\r\n'
count_lines += 1

for sym in sorted(s.symbol for s in b['sids_seen']):
balance      = b[sym]['balance']
init_price   = b[sym]['init_price']
shares       = b[sym]['shares']
shares_value = shares * b[sym]['price']
xval         = 'x0'
max_spent    = abs(b[sym]['cash_low'])
drawdown     = min( avg_init_cash, abs(b[sym]['cash_low']) )
if drawdown != 0:
portf = balance + shares_value
xval  = 'x' + "%.1f" % ((portf - drawdown) / drawdown)
if xval == 'x-0.0' or xval == 'x0.0':  # Mainly clearing -0.0
xval = 'x0'    # -0.0 would have been something like -0.02
if init_price:
buy_hold = "%.1f" % ((b[sym]['price'] - init_price) / init_price)
content = [
sym,
xval,
+ str(b[sym]['count_sell']),
+ str(b[sym]['cnt_sel_evnts']),
"%.0f" % init_price + '|' + "%.0f" % b[sym]['price'],
"%.0f" % max_spent,
"%.0f" % balance,
shares,
int(shares_value)
]
cc = 1
for c in content:
out_content += str(c).center(col_widths[cc])
cc += 1
out_content += '~\r\n'
count_lines += 1

# Decide when to tuck a group away for later and
#    start a new group, using modulus (remainder).
if count_lines % lines_out == 0:
out_content_collections.append(out_content)
out_content = '_\r\n'       # Restart a group

if count_lines % lines_out != 0:    # A few remaining lines.
out_content_collections.append(out_content)

log.info(out)        # The top, general overall output first.

# Show the stored groups
for occ in out_content_collections:
log.info(occ)

# Add any other content you want ---------------------------
out_content  = '_\n' # Underscore to a new line for left alignment,
#   '\n' by itself would be ignored/dropped.
# Some variables or whatever you might want to add...
out_content += ''

log.info(out_content)


There was a runtime error.

"Alpha" refers to the intercept component of the CAPM regression ("Beta" being the coefficient of the independent variable in that same formula). That regression basically attempts to explain the change in price of an asset as driven by the change in price of the market (since it is a single variable regression, any change in price not explained by change in the market must be "Alpha" as there are no other variables included to explain it). More here: http://en.wikipedia.org/wiki/Alpha_%28investment%29

Thanks for you replies. Here is an overview of strategy. I borrowed ideas from several papers but the basic idea is to use principal component regression to identify if a stock is above or below regression line. The next step is to decide if we long/short the stock based on whether its above/below regression line. In this version, I attempted to use a vol signal and intercept of regression to identify which way to trade. Later, I will test these signals on each stock (instead of complete basket) to see how they perform. This version runs from 2008. Also simplified code a bit and removed redundant bits.

I picked these ETFs from internet but I think this strategy will work on any correlated basket of stocks. I will try it on commodity ETFs and post my results.

61
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
import math
import numpy as np
import datetime
import pandas as pd
from pytz import timezone
from sklearn.decomposition import PCA
from zipline.utils import tradingcalendar as calendar
import statsmodels.api as sm
from statsmodels import tsa

class LogReturnSeries:
def __init__(self, t):
self.T = t
self.log_returns = None

log_prices = []
for ii in range(5, len(prices)):
if prices[ii-5] == 0:
lg = 0
else:
lg = math.log(prices[ii] / prices[ii-5])
lg = [lg]
log_prices.append(lg)
if self.log_returns is None:
self.log_returns = np.array(log_prices)
else:
self.log_returns = np.hstack((self.log_returns, np.array(log_prices)))

def get_series(self):
return np.copy(np.matrix(self.log_returns))

class Strategy:
def __init__(self, T, e):
self.T = T  # no of time periods
self.K = e  # explained components
self.N = 100 # no of stocks
self.log_returns = LogReturnSeries(T)

def regress(self, R, d):
fitted_values = []
p_values = []
r_squared = []
params = []
for ii in range(0, self.N):
f, p, r, s = self.regress_by_stock(d, R[:, ii])
fitted_values.append(f)
p_values.append(p)
r_squared.append(r)
params.append(s)
return fitted_values, p_values, r_squared, params

def regress_by_stock(self, Dm, Fm):
ols = sm.OLS(Fm, Dm).fit()
f = ols.fittedvalues[-1]
p = ols.f_pvalue
s = ols.params
return f, p, r, s

def calculate(self, N):
self.N = N
ret = self.log_returns.get_series()
pca = PCA(self.K)
d = pca.fit_transform(ret)

if np.isnan(d).any() or np.isnan(ret).any() or np.isinf(d).any() or np.isinf(ret).any():
return None

f, pv, rs, pm = self.regress(ret, d)

signal = [] # buy sell signal by stock

for ii in range(0, N):
r = ret[-1, ii]

if rs[ii] < 0.8 or pv[ii] > 0.01: # check for regression goodness of fit
signal.append(0)
continue

c = d.sum(axis=1)
lv = np.std(c[-30:]) # long term vol
sv = np.std(c[-10:]) # short term vol
alpha = pm[ii]    # intercept of regression

if sv > lv:
if r > f[ii]:
if alpha < 0:
signal.append(-1) #-1
else:
signal.append(0) # 0
elif r < f[ii]:
if alpha < 0:
signal.append(-1) #-1
else:
signal.append(0) # 0
else:
signal.append(0)
elif sv < lv:
if r > f[ii]:
if alpha > 0:
signal.append(1) # 1
else:
signal.append(1)  # 1
elif r < f[ii]:
if alpha > 0:
signal.append(1) # 1
else:
signal.append(1) # 1
else:
signal.append(0)
else:
signal.append(0)
return signal

def initialize(context):
set_symbol_lookup_date('2014-12-01')
context.symbols = symbols('EWT',
'AAXJ',
'VPL',
'EPP',
'EWA',
'EWS',
'GMF',
'IPAC',
'ENZL',
'DVYA',
'AUSE',
'AXJL',
'PAF',
'FHK',
'FPA',
'FTW',
'EWSS',
'DXKW',
'QAUS',
'DBAP',
'KROO',
'QTWN',
'AXJS',
'AXJV',
'HKOR',
'DBKO',
'GMFS',
'FKO',
'QKOR',
'EWAS',
'FAUS')
context.BENCHMARK = sid(27101)
set_benchmark(context.BENCHMARK)
schedule_function(func=myfunc, date_rule=date_rules.every_day(),time_rule=time_rules.market_open(hours=0,minutes=5))

def handle_data(context, data):
pass

def myfunc(context, data):
context.counter = 0
pricehistory = history(bar_count=51, frequency='1d', field='close_price')
strategy = Strategy(50,0.999999999)
idx = 0
ignorelist = []
for sid in data:
if np.isnan(pricehistory[sid].values).any():
ignorelist.append(sid)
else:
idx += 1

signal = strategy.calculate(idx)
if signal is None:
return

sidx = 0

for sid in data:
if sid in ignorelist:
continue
#if sid <> context.BENCHMARK:
#    sidx += 1
#    continue

if signal[sidx] == 1 and context.portfolio.positions[sid].amount <= 0:
order_target_value(sid, 60000)
elif signal[sidx] == -1 and context.portfolio.positions[sid].amount >= 0:
order_target_value(sid, -60000)
else:
if context.portfolio.positions[sid].amount > 0 and signal[sidx] < 1:
order_target(sid, 0)
if context.portfolio.positions[sid].amount < 0 and signal[sidx] > -1:
order_target(sid, 0)

sidx += 1

record(p=context.portfolio.capital_used)
There was a runtime error.

@Matt Impressive.

Wikipedia regarding Alpha includes: At Microsoft we were asked to write bug reports so a five-year old can understand, and keep in mind we're talkin' brilliant developers, lol.

Wonder if anyone has a description of Alpha that a five-year old can understand. Could maybe start with whether higher or lower is better. :) Lower Beta is better for example. Higher Sharpe is sharp. In the Managers Fund those two are valued.

That's a fine looking result Pavy, and what do you think of the Alpha at .17?

In practical terms a strategy with high alpha is a strategy that "beats the market" (meaning it could be up even if the market is down, but also up when the market is up).

The Capital Asset Pricing Model defines the concept of the security characteristic line. This is an important concept in finance.

I assume that you are familiar with the concept of a least-squares regression. The equation you included above is that line. Here is a definition of the meaning of each symbol:

R_{i, t} is the return of an asset i at period t.

R_f is the risk-free rate. This is the return that one could expect on an investment that posed no risk of default.

Since the US government has never defaulted on its obligations, the Treasury rate for a specific maturity (more accurately, the rate of a zero-coupon bond with a specific duration) is sometimes used as a proxy for the risk free rate. However, because of tax incentives, some feel that demand for Treasuries is artificially high and the rates are thus artificially low. Thus, other rates are used. For example, the LIBOR swap curve is used to obtain proxies for the risk-free rate.

\epsilon_{i, t} is a random shock, representing the error. It is a standard element in all regression models, so I won't go into too much detail. The important things to know are that it has an expectation of zero and is specific to both the security and the time.

R_{M, t} is the expected return for the entire market at time t. Clearly, this depends on how you define the market. The S&P 500 is often used, but you could also use a different benchmark depending on the application.

(R_{M, t} - R_f) represents the risk premium to investing in the market. A standard concept in finance is the idea that investors prefer less risk to more risk, and to compensate them for taking higher risk, they must be paid a premium in expected return. This is the value of that premium.

(R_{i, t} - R_f) is the premium for the specific security i. That is what the regression is trying to estimate.

OK, now on to the interesting parts of the equation: the regression coefficients, alpha and beta.

Beta is the correlation between the return of the security in question and the return of the market. An S&P 500 index fund has a beta of 1. A portfolio with half its money invested in cash and half invested in the S&P index fund has a beta of 0.5. A leveraged portfolio that has leverage ratio of 2 and has invested in the S&P index fund has a beta of 2. A portfolio that has sold short the S&P index fund has a beta of -1.

A higher or lower beta is neither good nor bad. It is a selection that should be made based on both how much risk the investor is willing to accept and their opinion on the market. Someone who is bearish may choose a portfolio with beta near 0 or even negative. A bull will choose a portfolio with positive beta. If he is willing to tolerate more risk (in exchange for a higher expected return), he will choose a higher beta portfolio. Since the market is always expected to have a positive return, the bear is missing out on returns by staying on the sideline. If he is smarter than the market, however, this choice may pay off for him.

Alpha is the intercept of the regression. It represents the return of the portfolio that is uncorrelated with the market. Every active investment manager and active investor is looking to deliver positive alpha. Here is the reason why:

Say I have a choice between two mutual funds: a passive fund that mirrors the S&P 500, and an actively managed fund that invests in the S&P 500.

The passive fund will likely have a very low expense ratio, say 0.05%. The active fund will have a higher expense ratio, say 1.00%. Since the passive fund exactly mirrors the S&P 500, it should have a beta of 1.0 and its random shock should be always 0 (since everyone understands how the fund works, its price should track the S&P 500 exactly). However, I am paying the 0.05% fee regardless of how the market performs, so my alpha is -0.05%.

Now lets assume that the actively managed fund also has a beta of 1.0. The beta for individual stocks is calculated by performing a regression on the historical returns of the stock and the historical returns of the market. The beta of a portfolio is the average beta of the stocks in the portfolio, weighted by investment. So from here, it should be clear that it is easy to calculated the beta for a portfolio.

Because the fund has a large number of stocks, the variance of epsilon for the active fund should be very low. This is the benefit of "diversification": because each epsilon is independent and has an expectation of 0, by the law of large numbers/CLT, the sum of these random variables should have an expectation of 0 with low variance.

Now to calculate the alpha of the active portfolio. This is the hardest part, and if it could be reliably done, it would make some hedge funds go extinct overnight and others very busy. Regardless, we know that the alpha is reduced by .01 because of the fee that we pay our manager. We can only hope that the talent of the manager in selecting stocks will increase the alpha as well.

If the alpha is above 0 (say, 5%), we want to invest in this fund. For the same risk level as the passive fund, we expect a higher return. This is the goal of all investing: more return, same risk (or another way, same return, less risk).

In conclusion, alpha is what every investor is looking for. In an efficient market (a major topic itself), alpha should be 0. Alpha is free money, so it both makes sense why people want it and why it's hard to find.