'''
For comparison, produces a table for each security of:
o Pandas Beta calculation.
o RollingLinearRegressionOfReturns "beta" value.
o statsmodels.api slope calculation (slope of regression, i.e. trend line)
RollingLinearRegressionOfReturns output does not match statsmodels.api slope nor Pandas Beta
except when returns_length = 2.
Every source on the web says Regression "beta" just means slope of the trend line.
Stocks Beta means volatility compared to some other volatility.
Regression beta means simply slope.
Regression is just a fancy word for trend line coined by Charles Darwin's cousin in the Victorian era.
Here, modifying returns_length and regression_length, you can find instances of positive pandas values and negative RollingLinearRegressionOfReturns values and visa versa.
RollingLinearRegressionOfReturns() claims to produce volatility Beta values with any returns_length and I haven't been able find a correlation with anyting except 2.
'''
from quantopian.algorithm import attach_pipeline, pipeline_output
from quantopian.pipeline import Pipeline, CustomFilter
from quantopian.pipeline.factors import AverageDollarVolume, RollingLinearRegressionOfReturns
import statsmodels.api as sm
import numpy as np
def initialize(context):
c = context
c.target_sec = sid(8554) # SPY
c.num = 2 # Number each of high and low to return
c.regression_length = 30
c.returns_length = 3
print ".\n\n\tChange c.returns_length to 2 if you want the numbers to match\n."
schedule_function(calcs, date_rules.every_day(), time_rules.market_open())
# Modify to inject any sids that have presumably known Beta from some other source ...
sids = SidInList( sid_list = ( c.target_sec , sid(33431), sid(30877), sid(49072), sid(1374) ))
dv = AverageDollarVolume(window_length=10).percentile_between(94, 95)
regression = RollingLinearRegressionOfReturns(
mask = (dv | sids),
target = c.target_sec,
returns_length = c.returns_length,
regression_length = c.regression_length,
)
pipe = Pipeline()
''' Add these columns if you wish ...
yval = regression.alpha ; pipe.add(yval, 'yval')
corr = regression.r_value ; pipe.add(corr, 'corr')
pval = regression.p_value ; pipe.add(pval, 'pval')
stder = regression.stderr ; pipe.add(stder, 'stder')
'''
# This is what statisticians mean by regression "beta", i.e. slope of trend line.
slope = regression.beta ; pipe.add(slope, 'slope')
pipe.set_screen( sids | slope.top(c.num) | slope.bottom(c.num) )
attach_pipeline(pipe, 'zoo')
def calcs(context, data):
c = context
prices = c.prices
prices['SPY_chg'] = prices[sid(8554)].pct_change()
spy_var = prices['SPY_chg'].var() # SPY price variance
log.info('.')
log.info(' Pandas Regression OLS ')
log.info(' Sym Beta "beta" Slope Slope')
for sec in prices.columns:
if sec == 'SPY_chg': continue
# pandas beta calc
beta = prices[sec].pct_change().cov(prices.SPY_chg) / spy_var
# Log all
log.info('{} {} {} {}'.format(
sec.symbol.rjust(5),
('%.2f' % beta).rjust(10), # pandas beta
('%.2f' % c.output['slope'][sec]).rjust(14), # regression slope
('%.2f' % slope(prices[sec].values)).rjust(14), # statmodels slope
))
def before_trading_start(context, data):
context.output = pipeline_output('zoo')
# pandas beta calculation prep
context.prices = data.history(
context.output.index,
'price',
context.regression_length + 1, '1d' # +1 due to the one-off discrepancy.
)
def slope(in_list): # Return beta (slope of trend line) portion of OLS regression.
return sm.OLS(in_list, sm.add_constant(range(-len(in_list) + 1, 1))).fit().params[-1]
class SidInList(CustomFilter):
inputs = [] ; window_length = 1 ; params = ('sid_list',)
def compute(self, today, assets, out, sid_list):
out[:] = np.in1d(assets, sid_list)
'''
c.output.sort('slope')
DataFrame:
corr pval slope stder yval
Equity(37736 [UCO]) -0.932672 7.420639e-10 -8.918334 0.791325 0.140035
Equity(4664 [SM]) -0.858707 6.303240e-07 -7.274278 0.995943 0.472035
Equity(30877 [CAPR]) -0.100903 6.634221e-01 -0.402654 0.910818 -0.086735
Equity(33431 [ROSG]) 0.469518 3.175997e-02 0.901243 0.388808 -0.068106
Equity(8554 [SPY]) 1.000000 1.308086e-188 1.000000 0.000000 0.000000
Equity(1374 [CDE]) 0.880160 1.438864e-07 10.636107 1.315960 -0.054483
Equity(18522 [ARMH]) 0.840600 1.839885e-06 13.218204 1.954028 0.000017
Pandas Regression OLS FinViz
Sym Beta "beta" Slope Slope Beta
CAPR 0.13 -3.45 -0.00 -2.25
CDE 7.32 21.56 -0.16 1.11
HAIN 7.49 21.37 -0.96 0.92
LABD -10.06 -23.21 0.08 none
NTAP 2.87 -11.83 0.47 1.58
ROSG -2.50 4.43 -0.01 -4.49
SPY 1.00 1.00 -0.02 none
Note: Since the finviz lookback window and method are unknown and values unverified,
there is no reason to trust it, nevertheless just in case, some are added
here for what it might be worth, however they only apply to the date of this code originally,
long gone now.
'''