Back to Community
A very crude mean reversion attempt

Here is an attempt to capture mean reverting security prices. My algorithm is very crude at the moment and needs major improvements. Just a proof of concept for the time being.

Clone Algorithm
642
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
import math
import numpy as np
import pandas as pd
import scipy as sp
import cvxpy as cvx
from sklearn.covariance import OAS
from sklearn.decomposition import PCA
import statsmodels.api as smapi
from statsmodels.stats.moment_helpers import cov2corr
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.algorithm import attach_pipeline, pipeline_output
from quantopian.pipeline.filters.morningstar import Q500US
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.factors.morningstar import MarketCap
from quantopian.pipeline.data import morningstar as mstar

class Holder:
    def __init__(self, score, loadings, params, instrs):
        self.score = score
        self.loadings = loadings
        self.params = params
        self.instrs = instrs
        self.days = 0
        
def make_pipeline():
    price_filter = USEquityPricing.close.latest >= 15
    pipe = Pipeline(screen=Q500US() & price_filter & MarketCap().top(150))
    return pipe
    
def initialize(context):
    context.Hedge = sid(8554)
    context.counter = 250
    context.scores = {}
    set_commission(commission.PerShare(cost=0.001, min_trade_cost=0))
    
    schedule_function(trade, 
                      date_rules.every_day(), 
                      time_rules.market_open(minutes=10))
    
    schedule_function(update_chart, 
                      date_rules.every_day(), 
                      time_rules.market_close(minutes=1))
    
    attach_pipeline(make_pipeline(), "Q500")
    
def handle_data(context, data):
    pass

def before_trading_start(context, data):
    if context.counter < 250:
        context.counter += 1
        return
    context.counter = 0
    context.output = pipeline_output("Q500")
    context.indices = context.output.index

def trade(context, data):
    prices = data.history(context.indices, "price", 250, "1d").dropna(axis=1)
    logP = prices.values
    diff = np.diff(logP, axis=0)
    pca = PCA(15, whiten=False)
    factors = pca.fit(diff).transform(logP)
    loadings = pca.components_.T
    model = smapi.OLS(logP, smapi.add_constant(factors)).fit()
    weights = np.zeros((diff.shape[1], diff.shape[1]))
    scores = -model.resid[-1, :] / np.std(model.resid, axis=0)
    
    for i in range(0, diff.shape[1]):
        sid = prices.columns[i]
        if abs(scores[i]) > 1 and abs(scores[i]) < 3 and len(context.scores) < 3:
            for j in range(0, loadings.shape[1]):
                weights[i, :] -= model.params.T[i, j+1] * loadings[:, j] * scores[i]
    	    weights[i, i] += scores[i]
            context.scores[sid] = Holder(scores[i], loadings, model.params.T[i, 1:], prices.columns.values)
        elif sid in context.scores:
            h = context.scores[sid]
            for j in range(0, h.loadings.shape[1]):
                for k, instr in enumerate(prices.columns):
                    if instr in h.instrs:
                        l = h.instrs.tolist().index(instr)
                        weights[i, k] -= h.params[j] * h.loadings[l, j] * h.score
            weights[i, i] += h.score
            h.days += 1
            
            if h.days > 15 or np.sign(scores[i]) <> np.sign(context.scores[sid].score):
                # Instead compute the new score and check if score is back to normal.
                del context.scores[sid]
                
        else:
            if sid in context.scores:
                del context.scores[sid]
                
    W = np.zeros(diff.shape[1])
    
    for i in range(0, diff.shape[1]):
        W += weights[i, :]

    for sid in context.portfolio.positions:
        if sid not in prices.columns and sid <> context.Hedge:
            order_target(sid, 0)
            if sid in context.scores:
                del context.scores[sid]
    
    weights = np.zeros(len(W))
    for i, sid in enumerate(prices.columns):
		weights[i] = W[i] * data.current(sid, "price")
	
    denom = np.sum(np.abs(weights)) * (4 - len(context.scores))
    
    if denom == 0:
        denom = 1.
    weights /= denom
    
    for i, sid in enumerate(prices.columns):
        order_target_percent(sid, weights[i])
    
def update_chart(context,data):
    record(leverage = context.account.leverage)

    longs = shorts = 0
    
    for position in context.portfolio.positions.itervalues():        
        if position.amount > 0:
            longs += 1
        if position.amount < 0:
            shorts += 1
            
    record(l=longs,s=shorts)
There was a runtime error.
3 responses

An here is the corresponding research notebook that shows mean reverting prices after adjusting for PCA factors.

Loading notebook preview...
Notebook previews are currently unavailable.

@Aqua Rooster It's an old post so you might have already figured out, but the NB is not really showing out of sample data. To compute those mean reverting prices you are regressing the prices against the factors, but the factors are computed using the prices themselves, this is look-ahead bias unfortunately. Any day plotted in blue (out of sample) should be computed with data coming only from previous days, otherwise it doesn't make sense.

As a proof of what I said, take any time period you like, any configuration of PCA you like and you always get a mean reverting output in your NB, but the same settings don't give good results in your algorithm

I believe the idea behind the algorithm is good and the algorithm is fine too. I only wanted to highlight that the NB needs some changes.

@Aqua Rooster, I am curious about this code:

ret = np.diff(lP, axis=0)  
pca = PCA(50, whiten=False)  
factors = pca.fit(ret[:100, :]).transform(lP)  
model = smapi.OLS(lP, smapi.add_constant(factors)).fit()  

The PCA is computed on price diff (why not returns or log returns?) and then you compute the residual on the price (after PCA transformation). This is interesting, could you elaborate on this?

Also, do you have an empirical rule on how many PCA components to use?

Thanks.