'''
Alpha, selected for low alpha volatility
by Viridian Hawk
modified, blue seahawk ...
Tweaking is considered overfitting yet it can be educational at times.
Some surprising differences:
window_length = 120 # 9.94
window_length = 126 # 11.74 half year vs 120
m &= alphaStd.bottom( c.num_stocks ) # 15.60
# These two together
m &= alphaStd.percentile_between( 5, 80, mask=m) # 27.51 huh?
m &= alphaStd.bottom( c.num_stocks )
Returns in comments are to to 1-5-2012 only
'''
import math
import numpy as np
import pandas as pd
import quantopian.algorithm as algo
import quantopian.optimize as opt
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.experimental import risk_loading_pipeline
from quantopian.pipeline.factors import SimpleBeta
from quantopian.pipeline.filters import QTradableStocksUS, StaticAssets
class Alpha(CustomFactor):
inputs = [USEquityPricing.close]
def compute(self, today, assets, out, close):
close = nanfill(close)
returns = pd.DataFrame(close, columns=assets).pct_change()[1:]
spy_returns = returns[symbol('SPY')]
# get beta and alpha by running linear regression
A = np.vstack([spy_returns, np.ones(len(spy_returns))]).T
m, p = np.linalg.lstsq(A, returns)[0]
out[:] = p
class Volatility(CustomFactor):
window_safe = True
def compute(self, today, assets, out, returns):
returns = nanfill(returns)
# [0:-1] is needed to remove last close since diff is one element shorter
daily_returns = np.diff(returns, axis = 0) / returns[0:-1]
out[:] = daily_returns.std(axis = 0) * math.sqrt(252)
def initialize(context):
context.num_stocks = 200 # 11.74
context.num_stocks = 100 # 13.89
context.num_stocks = 80 # 11.08
context.num_stocks = 140 # 14.61
context.num_stocks = 125 # 15.30
context.num_stocks = 120 # 15.37
set_slippage(slippage.FixedSlippage(spread=0))
set_commission(commission.PerTrade(cost=0))
# to 1-5-2012
# original: 9.06
schedule_function(trade, date_rules.month_start(), time_rules.market_open(hours=1)) # 9.94
#schedule_function(trade, date_rules.every_day(), time_rules.market_open()) # 9.71
#schedule_function(trade, date_rules.every_day(), time_rules.market_open(hours=1)) # 9.60
#schedule_function(trade, date_rules.every_day(), time_rules.market_close(hours=1)) # 8.41
#schedule_function(trade, date_rules.every_day(), time_rules.market_close(minutes=25)) # 8.28
algo.attach_pipeline(make_pipeline(context), 'pipeline')
algo.attach_pipeline(risk_loading_pipeline(), 'risk_loading_pipeline')
def make_pipeline(c):
# Here, regression_length=window_length
window_length = 252 # 6.35
window_length = 120 # 9.94
window_length = 90 # 8.89
window_length = 60 # 4.03
window_length = 126 # 11.74 half year vs 120
# At this point I re-hard-coded as regression_length=126
# Any testing you want to do now with window_length will be different than before.
beta = SimpleBeta(target=symbol('SPY'), regression_length=126) # 15.37
# a rather illogical test. whatever works.
beta = SimpleBeta(target=symbol('SPY'), regression_length=window_length).demean() # 15.60
m = beta.notnull()
alpha = Alpha(window_length=window_length, mask=m) # 15.60
#alpha = Alpha(window_length=window_length, mask=m).demean() # 15.60
m &= alpha.notnull()
# Moving this up above alpha = Alpha(w..., there's an error in Alpha(), so why not here?
m &= QTradableStocksUS() #| StaticAssets(symbol('SPY'))
alphaStd = Volatility(inputs=[alpha.zscore(mask=m)], window_length=window_length, mask=m) # 15.60
#alphaStd = Volatility(inputs=[alpha.rank(mask=m)], window_length=window_length, mask=m) # 3.88
#alphaStd = Volatility(inputs=[alpha.zscore(mask=m)], window_length=window_length, mask=m).demean() # 15.60
m &= alphaStd.notnull()
#m &= alphaStd.bottom( c.num_stocks ) # 15.60
#m &= alphaStd. top( c.num_stocks ) # 2.71
# These two together
#m &= alphaStd.percentile_between(10, 80, mask=m) # 12.88
m &= alphaStd.percentile_between( 5, 80, mask=m) # 27.51 huh?
#m &= alphaStd.percentile_between( 4, 80, mask=m) # 24.95
#m &= alphaStd.percentile_between( 6, 80, mask=m) # 21.13
m &= alphaStd.bottom( c.num_stocks )
return Pipeline(
columns = {
'alpha' : alpha.zscore(mask=m).demean(),
'beta' : beta,
'alphaStd': alphaStd,
},
screen = m
)
def before_trading_start(context, data):
context.output = algo.pipeline_output('pipeline').dropna()
context.risk_loading_pipeline = algo.pipeline_output('risk_loading_pipeline').dropna()
longs = shorts = 0
for stock in context.portfolio.positions:
if context.portfolio.positions[stock].amount > 0:
longs += 1
elif context.portfolio.positions[stock].amount < 0:
shorts += 1
record(longs = longs)
record(shorts = shorts)
record(l = context.account.leverage)
if 'log_data_done' not in context: # show values once
log_data(context, data, context.output, 4)
def trade(context, data):
a = context.output.alpha
#a -= a.mean() # centering, not necessary with demean() on it in pipeline
conc = 1.0 / len(a)
algo.order_optimal_portfolio(
objective=opt.MaximizeAlpha( a ),
constraints=[
opt.MaxGrossExposure( 1.00 ),
opt.NetExposure( -0.05, 0.05 ),
opt.PositionConcentration.with_equal_bounds( -conc, conc ),
opt.FactorExposure(
context.output[['beta']],
min_exposures={'beta': -0.00},
max_exposures={'beta': 0.00} ),
#opt.experimental.RiskModelExposure(
# risk_model_loadings=context.risk_loading_pipeline,
# version=opt.Newest ),
##opt.MaxTurnover( 0.25 if context.account.leverage > 0.85 and context.account.leverage <= 1.07 else 1.5 )
],
)
def nanfill(_in):
# From https://stackoverflow.com/questions/41190852/most-efficient-way-to-forward-fill-nan-values-in-numpy-array
# Includes a way to count nans on webpage at
# https://www.quantopian.com/posts/forward-filling-nans-in-pipeline
#return _in # uncomment to not run the code below
mask = np.isnan(_in)
idx = np.where(~mask,np.arange(mask.shape[1]),0)
np.maximum.accumulate(idx,axis=1, out=idx)
_in[mask] = _in[np.nonzero(mask)[0], idx[mask]]
return _in
def log_data(context, data, z, num, fields=None):
''' Log info about pipeline output or, z can be any DataFrame or Series
https://www.quantopian.com/posts/overview-of-pipeline-content-easy-to-add-to-your-backtest
'''
if 'log_init_done' not in context: # {:,} magic for adding commas
log.info('${:,} {} to {}'.format(int(context.portfolio.starting_cash),
get_environment('start').date(), get_environment('end').date()))
context.log_data_done = 1
if not len(z):
log.info('Empty')
return
# Options
log_nan_only = 0 # Only log if nans are present
show_sectors = 0 # If sectors, do you want to see them or not
show_sorted_details = 1 # [num] high & low securities sorted, each column
padmax = 6 # num characters for each field, starting point
# Series ......
if 'Series' in str(type(z)): # is Series, not DataFrame
nan_count = len(z[z != z])
nan_count = 'NaNs {}/{}'.format(nan_count, len(z)) if nan_count else ''
if (log_nan_only and nan_count) or not log_nan_only:
pad = max( padmax, len('%.5f' % z.max()) )
log.info('{}{}{} Series len {}'.format('min'.rjust(pad+5),
'mean'.rjust(pad+5), 'max'.rjust(pad+5), len(z)))
log.info('{}{}{} {}'.format(
('%.5f' % z.min()) .rjust(pad+5),
('%.5f' % z.mean()).rjust(pad+5),
('%.5f' % z.max()) .rjust(pad+5),
nan_count
))
log.info('High\n{}'.format(z.sort_values(ascending=False).head(num)))
log.info('Low\n{}' .format(z.sort_values(ascending=False).tail(num)))
return
# DataFrame ......
content_min_max = [ ['','min','mid','max',''] ] ; content = ''
for col in z.columns:
#try: z[col].max()
#except:
# log.info('{} non-numeric'.format(col))
# #continue # skip non-numeric
if col == 'sector' and not show_sectors: continue
nan_count = len(z[col][z[col] != z[col]])
nan_count = 'NaNs {}/{}'.format(nan_count, len(z)) if nan_count else ''
# known bug, not always sorting strings alphabetically ...
srt = z[col].sort_values() if type(z[col][0]) != str else z.iloc[z[col].str.lower().argsort()]
padmax = max( padmax, len(str(srt[-1])) )
content_min_max.append([col, str(srt[0]), str(srt[len(srt)//2]), str(srt[-1]), nan_count])
if log_nan_only and nan_count or not log_nan_only:
if len(z.columns) == 1: content = 'Stocks: {}'.format(z.shape[0])
if len(z.columns) > 1: content = 'Stocks: {} Columns: {}'.format(z.shape[0], z.shape[1])
if len(z.columns):
paddings = [6 for i in range(4)]
for lst in content_min_max: # set max lengths
i = 0
for val in lst[:4]: # value in each sub-list
paddings[i] = max(paddings[i], len(str(val)))
i += 1
headr = content_min_max[0]
content += ('\n{}{}{}{}{}'.format(
headr[0] .rjust(paddings[0]),
(headr[1]).rjust(paddings[1]+5),
(headr[2]).rjust(paddings[2]+5),
(headr[3]).rjust(paddings[3]+5),
''
))
for lst in content_min_max[1:]: # populate content using max lengths
content += ('\n{}{}{}{} {}'.format(
lst[0].rjust(paddings[0]),
lst[1].rjust(paddings[1]+5),
lst[2].rjust(paddings[2]+5),
lst[3].rjust(paddings[3]+5),
lst[4],
))
log.info(content)
if not show_sorted_details: return
if len(z.columns) == 1: return # skip detail if only 1 column
if fields == None: details = z.columns
for detail in details:
if detail == 'sector' and not show_sectors: continue
hi = z[details].sort_values(by=detail, ascending=False).head(num)
lo = z[details].sort_values(by=detail, ascending=False).tail(num)
content = ''
content += ('_ _ _ {} _ _ _' .format(detail))
content += ('\n\t... {} highs\n{}'.format(detail, str(hi)))
content += ('\n\t... {} lows \n{}'.format(detail, str(lo)))
if log_nan_only and not len(lo[lo[detail] != lo[detail]]):
continue # skip if no nans
log.info(content)
#for i in range(0, len(content), 1000):
# log.info(content[i:i+1000])
#log.info(content)