Back to Community
PCA statistical arbitrage with 'Log Volume' as one of the factors

I tried a variation to the conventional PCA stat arb with log of daily volume as one of the other factors for calculating the residuals. While the back tested results were fine for the in sample data, my out of sample algorithm did not fire any trade. The notebook and results are attached for your reference. If someone can point out anything missing in the algo, it will be of great help :)

Clone Algorithm
1
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
"""
This is a template algorithm on Quantopian for you to adapt and fill in.
"""
import quantopian.algorithm as algo
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.filters import QTradableStocksUS
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import statsmodels.api as sm


def initialize(context):
    """
    Called once at the start of the algorithm.
    """   
   
    context.stock1 = symbol('TXN')
    context.stock2 = symbol('GOOG_L')
    context.stocks = [context.stock1, context.stock2]
    set_slippage(slippage.FixedSlippage(spread=0.00))
    set_commission(commission.PerShare(cost=0.0, min_trade_cost=0.0))
    context.lookback = 30
    context.leverage = 1.0
    context.n_components = 1
    context.entry = 2.0
    context.exit = 0
    schedule_function(  
    handle_data,
    algo.date_rules.every_day(),  
    algo.time_rules.market_open(minutes=5),  
)
    
def handle_data(context, data):
    record(leverage=context.account.leverage,
           exposure=context.account.net_leverage) 
    observations_price = data.history(context.stocks,'price',context.lookback, '1d').dropna(axis=1)
    observations_volume = data.history(context.stocks,'volume',context.lookback, '1d').dropna(axis=1)
    try:
        inpos = np.zeros(len(context.stocks))
        models = get_PCA_regressions(observations_price, observations_volume,n_factors=context.n_components)
        resids = pd.DataFrame({sym: models[sym].resid for sym in models})
        zscores = Zscore(resids).iloc[-1]
        for inst in range(len(context.stocks)):
            if zscores.abs()>context.entry and inpos[inst]==0:
                order_target_percent(inst,np.sign(zscores)*1)
                inpos[inst]=zscores
            elif zscores*np.sign(inpos[inst])<context.exit and inpos[inst]:
                order_target(inst,0)
                inpos[inst] = 0        
       #weights = zscores[zscores.abs() > context.min_z]
        #weights *= (context.leverage / weights.abs().sum())
        #for stock in data:
            #if stock in weights.index:
               # order_target_percent(stock, -weights[stock])
            #else:
               # order_target(stock, 0)
    except Exception as e:
        log.debug(e)

   
#def trade(context, data):                      
    
def get_PCA_regressions(data, ext_fact, n_factors=1):
    pca = PCA()
    model = pca.fit(np.log(data))
    factors = data.dot(model.components_.T)[range(n_factors)]
    factors = sm.add_constant(factors)
    models = {sym: sm.OLS(data[sym], pd.DataFrame(np.array([factors,np.log(ext_factors[sym])]))).fit()
              for sym in data.columns}
    return models
    
        
def Zscore(X):
    return (X - X.mean()) / X.std()
There was a runtime error.