Study showed that big change in trading volume often precedes price change, reflecting information leak. One might be able to detect this by find the correlation between price change and volume turnover. Here is my attempt to do that.

There are two issues need to be addressed:

The algo runs slowly because it has to do column-wise subset correlation. Any suggestion to speed it up is welcome

The equity curve is trending down monotonically which looks like the factor has some alpha. Is there any way to transform the factor so that the equity curve is trending up?

""" This is a template algorithm on Quantopian for you to adapt and fill in. """ import quantopian.algorithm as algo from quantopian.pipeline import Pipeline,CustomFactor from quantopian.pipeline.data.builtin import USEquityPricing from quantopian.pipeline.filters import QTradableStocksUS,StaticAssets from quantopian.pipeline.factors import AverageDollarVolume, SimpleBeta, Returns, SimpleMovingAverage,Latest from quantopian.pipeline.data import morningstar from quantopian.pipeline.data import Fundamentals import quantopian.optimize as opt import quantopian.pipeline.factors as Factors from quantopian.pipeline.experimental import risk_loading_pipeline from quantopian.pipeline.classifiers.morningstar import Sector from sklearn import preprocessing import math import numpy as np import pandas as pd from scipy import stats def preprocess(a): a = np.nan_to_num(a - np.nanmean(a)) return preprocessing.scale(a) def preprocess2(a): a = a.astype(np.float64) a[np.isinf(a)] = np.nan a = np.nan_to_num(a - np.nanmean(a)) a = winsorize(a, limits=[WIN_LIMIT,WIN_LIMIT]) return preprocessing.scale(a) def leaky(p,q): FRup = -0.42*p[p>0].corr(q[p>0]) FRdn = 1.42*p[p<=0].corr(q[p<=0]) return -(FRup+FRdn) def make_factors(): class Leak(CustomFactor): inputs = [USEquityPricing.close, USEquityPricing.volume, morningstar.valuation.shares_outstanding] window_length = 20 def compute(self, today, assets, out, close, vol, share): n = 8 sft = 1 pr = pd.DataFrame(close, columns=assets).dropna(how='all') hsl = pd.DataFrame(vol/share, columns=assets).dropna(how='all') pr_ret = pr.pct_change(sft) pr_ret21 = pr_ret.rolling(n).mean() hsl_ret = hsl.pct_change(sft) hsl_ret21 = hsl_ret.rolling(n).mean() pr_ret = (pr_ret-pr_ret21).iloc[-n:,:] hsl_ret = (hsl_ret-hsl_ret21).shift(sft).iloc[-n:,:] FR = list(map(lambda x: leaky(pr_ret[x], hsl_ret[x]), assets)) out[:] = preprocess(FR) return { 'Leak' : (Leak,1), } def initialize(context): context.spy = sid(8554) context.std = [] algo.schedule_function( allocate, date_rule=algo.date_rules.week_start(), time_rule=algo.time_rules.market_close(minutes=30), ) pipe = make_pipeline(context) algo.attach_pipeline(pipe, 'pipeline') algo.attach_pipeline(risk_loading_pipeline(), 'risk_loading_pipeline') def make_pipeline(context): class Window_Safe(CustomFactor): window_length = 1 window_safe = True def compute(self, today, assets, out, value): out[:] = value pipe = Pipeline() my_etfs = StaticAssets(symbols('SPY')) beta = SimpleBeta(target=symbol('SPY'),regression_length=120, allowed_missing_percentage=1.0 ) universe = QTradableStocksUS() & Sector().notnull() & beta.notnull() dollar_volume = AverageDollarVolume(window_length=5, mask = universe) high_dollar_volume = dollar_volume.top(800) universe = universe & high_dollar_volume mktcap = Factors.MarketCap(mask =universe) factors = make_factors() combined_alpha = None for name, (f,w) in factors.items(): if name == 'CPT': fac = w*f(mask=universe | my_etfs) else: fac = w*f(mask=universe) if combined_alpha == None: combined_alpha = fac else: combined_alpha += fac pipe.add(beta, 'beta') pipe.add(combined_alpha,'combined_alpha') pipe.set_screen((universe & combined_alpha.notnull())|my_etfs) return pipe def before_trading_start(context, data): context.risk_loading_pipeline = algo.pipeline_output('risk_loading_pipeline').dropna() context.output = algo.pipeline_output('pipeline') def allocate(context, data): df = context.output alpha = df.combined_alpha stocks = alpha.index st = [] for i in range(len(stocks)): if data.can_trade(stocks[i]) and not math.isnan(alpha[i]): st.append(stocks[i]) alpha = alpha[st] how_to = {'1': 'alpha', '2':'weights'} how = how_to['1'] # Constraint Parameters MAX_GROSS_LEVERAGE = 1.0 MAX_SHORT_POSITION_SIZE = 0.03 MAX_LONG_POSITION_SIZE = 0.03 MIN_BETA_EXPOSURE = -0.04 MAX_BETA_EXPOSURE = 0.04 SECTOR_EXPOSURE = 0.05 MAX_TURNOVER = 0.95 constraints = [] constrain_gross_leverage = opt.MaxGrossExposure(MAX_GROSS_LEVERAGE) constrain_pos_size = opt.PositionConcentration.with_equal_bounds( -MAX_SHORT_POSITION_SIZE, MAX_LONG_POSITION_SIZE, ) beta_neutral = opt.FactorExposure( context.output[['beta']], min_exposures={'beta': MIN_BETA_EXPOSURE}, max_exposures={'beta': MAX_BETA_EXPOSURE}, ) constrain_turnover = opt.MaxTurnover(MAX_TURNOVER) dollar_neutral = opt.DollarNeutral(tolerance=0.0000001) constrain_sector_style_risk = opt.experimental.RiskModelExposure( context.risk_loading_pipeline, version=0,#opt.Newest, min_volatility=-0.3, max_volatility=0.3, min_momentum=-0.3, max_momentum=0.3, min_short_term_reversal=-0.3, max_short_term_reversal=0.3, ) constraints = [ constrain_sector_style_risk, #sector_neutral, dollar_neutral, #constrain_turnover, beta_neutral, constrain_pos_size, constrain_gross_leverage, ] # Run the optimization. This will calculate new portfolio weights and # manage moving our portfolio toward the target. if how == 'alpha': try: algo.order_optimal_portfolio( objective=opt.MaximizeAlpha(alpha), constraints=constraints ) except Exception as e: log.info('error {}'.format(e) ) return else: weights = alpha.fillna(0) weights = weights - weights.mean() weights = weights/weights.abs().sum() weights = opt.calculate_optimal_portfolio( objective=opt.TargetWeights(weights), constraints=[ dollar_neutral, constrain_pos_size,], ) algo.order_optimal_portfolio( objective=opt.TargetWeights(weights), constraints=[dollar_neutral, constrain_pos_size,], ) def handle_data(context, data): """ Called every minute. """ pass