Back to Community
Help for a newby (mean reversion)

I very new to this, and I've written this (my first) mean reversion algo which calculates a hedge ratio using linear regression between USO and GLD and then buys/sells depending on the deviation of the portfolio price from the mean. It hasn't turned out very well, unsurprisingly, and the results vary wildly depending on what I set as the lookback period. I was wondering if anyone could point out if I have fundamentally misunderstood how this strategy works. Pointers on my terrible, terrible code would also be appreciated.

Clone Algorithm
4
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
import pandas as pd
import numpy as np
from pandas.stats.api import ols

def initialize(context):
    
    #trading only GLD and USO ETfs
    context.securities = symbols('GLD', 'USO')
    
    #pretty arbitrary lookback for moving mean and std of portfolio price
    context.lookback = 10
    
    #to track portfolio price
    context.yPort = []
    
    # Rebalance every day, 1 hour after market open.
    schedule_function(my_rebalance, date_rules.every_day(), time_rules.market_close(minutes = 10))
     
    # Record tracking variables at the end of each day.
    schedule_function(my_record_vars, date_rules.every_day(), time_rules.market_close())       

 
def before_trading_start(context, data):
    lb = context.lookback
    
    #ETF recent prices
    x = data.history(symbol('GLD'), 'close', lb, '1d')
    y = data.history(symbol('USO'), 'close', lb, '1d')
    
    #linear regression to get hedge ratio (coffeccient of x)
    df = pd.DataFrame({'GLD':x, 'USO':y})
                      
    res = ols(y=df['USO'], x=df['GLD'])
    
    context.hr = res.beta['x']
    

def my_rebalance(context,data):
    lb = context.lookback
    
    #get current ETF prices at near end of day
    x = data.current(symbol('GLD'), 'price')
    y = data.current(symbol('USO'), 'price')
    
    #price of unit portfolio
    port = y - context.hr*x
    
    #keep track of portfolio prices
    context.yPort.append(port)
    
    #only need previous lookback number of prices
    if len(context.yPort) > lb + 1:
        context.yPort = context.yPort[1::]
     
    #after enough portfolio prices have been determined
    if len(context.yPort) == lb + 1:
        #moving average and std of portfolio prices
        ma = np.mean(context.yPort[:lb])
        ms = np.std(context.yPort[:lb])
        
        #Z-score of current portfolio value
        z = (port - ma)/ms
        
        #order ETFs depending on current hedge ratio and multiplied by Z-score, pumped up by an arbitrary factor
        order_target_value(symbol('GLD'), 10000*z*context.hr*x)
        order_target_value(symbol('USO'), -10000*z*y)
     
def my_record_vars(context, data):
    record(leverage = context.account.leverage)
    record(hedgeRatio = context.hr)

There was a runtime error.
7 responses

Some changes I've made, which seem to help reduce the volatility and drawdown:
- order on market open, using yesterday's close data (I guessed more liquidity is better than trying to predict the close)
- compare current day's closing price to moving average of last 10 days including itself (this should improve accuracy)
- grow position size as portfolio grows (instead of fixed position sizing)

Clone Algorithm
6
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
import pandas as pd
import numpy as np
from pandas.stats.api import ols

def initialize(context):
    
    #trading only GLD and USO ETfs
    context.securities = symbols('GLD', 'USO')
    
    #pretty arbitrary lookback for moving mean and std of portfolio price
    context.lookback = 10
    
    #to track portfolio price
    context.yPort = []
    
    # Rebalance every day, 1 hour after market open.
    schedule_function(my_rebalance, date_rules.every_day(), time_rules.market_open())
     
    # Record tracking variables at the end of each day.
    schedule_function(my_record_vars, date_rules.every_day(), time_rules.market_close())       

 
def before_trading_start(context, data):
    lb = context.lookback
    
    #ETF recent prices
    x = data.history(symbol('GLD'), 'close', lb, '1d')
    y = data.history(symbol('USO'), 'close', lb, '1d')
    
    #linear regression to get hedge ratio (coffeccient of x)
    df = pd.DataFrame({'GLD':x, 'USO':y})
                      
    res = ols(y=df['USO'], x=df['GLD'])
    
    context.hr = res.beta['x']
        
    #price of unit portfolio
    port = y[-1] - context.hr*x[-1]
    context.port = port
    context.x = x[-1]
    context.y = y[-1]
    
    #keep track of portfolio prices
    context.yPort.append(port)
    
    context.z = 0
    
    #only need previous lookback number of prices
    if len(context.yPort) >= lb:
        context.yPort = context.yPort[-lb:]

        ma = np.mean(context.yPort)
        ms = np.std(context.yPort)
        
        #Z-score of current portfolio value
        context.z = (port - ma)/ms
    

def my_rebalance(context,data):
        
        z = context.z
        y = context.y
        x = context.x
        acc = context.portfolio.portfolio_value
        #order ETFs depending on current hedge ratio and multiplied by Z-score, pumped up by an arbitrary factor
        order_target_value(symbol('GLD'), acc*z/100*context.hr*x)
        order_target_value(symbol('USO'), -acc*z/100*y)
        #order_target_value(symbol('GLD'), 10000*z*context.hr*x)
        #order_target_value(symbol('USO'), -10000*z*y)
        
     
def my_record_vars(context, data):
    #record(leverage = context.account.leverage)
    #record(hedgeRatio = context.hr)
    record(port=context.port)

There was a runtime error.

Also I was wondering if you actually need two lookback periods. One is for establishing the hedge ratio. If it's relatively stable this could be longer than the current 10 days. The other is for establishing direction of mean reversion. This will be related to the half life of mean reversion, and may vary. I believe it's an output of OLS?

Update: the half life of mean reversion can be worked out from the auto-correlation slope. In other words the beta of the series returns regressed on lagged version of itself. Ernie Chan gives the matlab code in his book.

Found this:

http://epchan.blogspot.co.uk/2011/06/when-cointegration-of-pair-breaks-down.html

Read the comments. Ernie is extremely generous with his thoughts!

I noticed the algo was really unstable with the lookback length. Looking at the chart of the hedge ratio, I can see its very unstable, and yet I would expect the hedge ratio to only change relatively slowly. So, I use an exponential smoothing on the hedge ratio. This seems to help stabilise the performance for a wider range of lookback parameters.

Clone Algorithm
9
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
import pandas as pd
import numpy as np
from pandas.stats.api import ols

def initialize(context):
    
    #trading only GLD and USO ETfs
    context.securities = symbols('GLD', 'USO')
    
    #pretty arbitrary lookback for moving mean and std of portfolio price
    context.lookback = 10
    context.hr = 0.10
    
    #to track portfolio price
    context.yPort = []
    
    # Rebalance every day, 1 hour after market open.
    schedule_function(my_rebalance, date_rules.every_day(), time_rules.market_open())
     
    # Record tracking variables at the end of each day.
    schedule_function(my_record_vars, date_rules.every_day(), time_rules.market_close())       

 
def before_trading_start(context, data):
    lb = context.lookback
    
    #ETF recent prices
    x = data.history(symbol('GLD'), 'close', lb, '1d')
    y = data.history(symbol('USO'), 'close', lb, '1d')
    
    #linear regression to get hedge ratio (coffeccient of x)
    df = pd.DataFrame({'GLD':x, 'USO':y})
                      
    res = ols(y=df['USO'], x=df['GLD'])
    
    #smooth the HR so it doesn't change so rapidly
    context.hr = context.hr*(1.0-1.0/lb)+res.beta['x']*(1.0/lb)
        
    #price of unit portfolio
    port = y[-1] - context.hr*x[-1]
    context.port = port
    context.x = x[-1]
    context.y = y[-1]
    
    #keep track of portfolio prices
    context.yPort.append(port)
    
    context.z = 0
    
    #only need previous lookback number of prices
    if len(context.yPort) >= lb:
        context.yPort = context.yPort[-lb:]

        ma = np.mean(context.yPort)
        ms = np.std(context.yPort)
        
        #Z-score of current portfolio value
        context.z = (port - ma)/ms
    

def my_rebalance(context,data):
        
        z = context.z
        y = context.y
        x = context.x
        acc = context.portfolio.portfolio_value
        #order ETFs depending on current hedge ratio and multiplied by Z-score, pumped up by an arbitrary factor
        order_target_value(symbol('GLD'), acc*z/100*context.hr*x)
        order_target_value(symbol('USO'), -acc*z/100*y)
        #order_target_value(symbol('GLD'), 10000*z*context.hr*x)
        #order_target_value(symbol('USO'), -10000*z*y)
        
     
def my_record_vars(context, data):
    #record(leverage = context.account.leverage)
    record(hedgeRatio = context.hr)
    #record(port=context.port)

There was a runtime error.

Thats very helpful, thanks- yeah, I thought it was strange how much the hedge ratio was fluctuating, I wonder if it is due to having a short lookback period for the linear regression.

This paper looks good. Updates the hedge ratio using a kalman filter.

http://file.scirp.org/pdf/JMF_2016022913315540.pdf