Back to Community
Principle Component Analysis of the Energy Sector

Hi everybody,
I threw together an algo that uses PCA to get the component that explains the most variance in the energy sector. It assumes the principle component is a unit portfolio and buys 500 units of the portfolio.

It is really basic, but could be the baseline for more work. Here are a couple references on the idea.
PCA analysis in an Ipython notebook

Stat Arb in the US Equities Market

This is not a duplication of either, but an overly simplified version of the idea. It uses prices instead of returns, and rebalances on a 5 day schedule. I need to do more work to wrap my head around the math, but it looks like there's a strategy in there somewhere.

David

Clone Algorithm
63
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
'''
Principle Component Analysis


http://www.math.nyu.edu/faculty/avellane/AvellanedaLeeStatArb071108.pdf
http://nbviewer.ipython.org/github/carljv/Will_it_Python/blob/master/MLFH/ch8/ch8.ipynb
'''

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from zipline.utils import tradingcalendar as calendar



def initialize(context):
    set_symbol_lookup_date('2005-01-01')
    context.stocks = symbols(
        'XOM',  'CVX',  'SLB',  'COP',  'EOG',  'PXD',  'OXY',  'HAL',  'APC',  
        'WMB',  'APA',  'NOV',  'BHI',  'VLO',  'NBL',  'DVN', 'NFX',
        'COG',  'HES',  'MRO', 'FTI',  'TSO', 'RRC',  'CAM',  
        'CHK',  'SWN',  'CNX',  'EQT',  'MUR',  'RIG',  'OKE',  'NBR',  'DNR',  
        'ESV',  'HP',  'RDC',  'NE',  'BTU',  'DO', 'XLE'
    )
    set_slippage(slippage.FixedSlippage(spread=0.00))
    set_commission(commission.PerShare(cost=0.01, min_trade_cost=1.0))
    context.manager = EventManager(period=5, rule_func=lambda x: True)
    context.leverage = 1.5

def handle_data(context, data):
    P = context.portfolio
    pos = P.positions
    
    market_value = 0
    for i in data:
        market_value += abs(pos[i].amount * data[i].price)
        
    record(leverage=market_value / P.portfolio_value,
           exposure=P.positions_value / P.portfolio_value)
    
    if not context.manager.signal(get_datetime()):
        return
    
    # Drop any stocks with nan values
    prices = history(60, '1d', 'price').T.dropna().T
    prices = zscore(np.log(prices))
    
    pca = PCA(n_components=1).fit(prices.corr())
    
    unit_values = -pd.Series(pca.components_[0], index=prices.columns)
    
    for i in data:
        if i in unit_values.index:
            shares = 500 * unit_values[i]
            order_target(i, shares)
        else:
            order_target(i, 0)


zscore = lambda x: (x - x.mean()) / x.std()


class EventManager(object):
    '''
    Manager for timing the entry point of periodic events.

    '''
    def __init__(self, 
                 period=1,
                 max_daily_hits=1,
                 rule_func=None):
        '''
        :Parameters:
            period: integer <default=1>            
                number of business days between events

            max_daily_hits: integer <default=1>
                upper limit on the number of times per day the event is triggered.
                (trading controls could work for this too in some cases)

            rule_func: function (returns a boolean) <default=None>
                decision function for timimng an intraday entry point
        '''
        self.period = period
        self.max_daily_hits = max_daily_hits
        self.remaining_hits = max_daily_hits
        self._rule_func = rule_func
        self.next_event_date = None
        self.market_open = None
        self.market_close = None
    
    @property
    def todays_index(self):
        dt = calendar.canonicalize_datetime(get_datetime())
        return calendar.trading_days.searchsorted(dt)
    
    def open_and_close(self, dt):
        return calendar.open_and_closes.T[dt]

    def signal(self, *args, **kwargs):
        '''
        Entry point for the rule_func
        All arguments are passed to rule_func
        '''
        now = get_datetime()
        dt = calendar.canonicalize_datetime(now)
        if self.next_event_date is None:
            self.next_event_date = dt
            times = self.open_and_close(dt)
            self.market_open = times['market_open'] 
            self.market_close = times['market_close'] 
        if now < self.market_open:
            return False
        if now == self.market_close:
            self.set_next_event_date()
        decision = self._rule_func(*args, **kwargs)
        if decision:
            self.remaining_hits -= 1
            if self.remaining_hits <= 0:
                self.set_next_event_date()
        return decision
    
    def set_next_event_date(self):
        self.remaining_hits = self.max_daily_hits
        tdays = calendar.trading_days
        idx = self.todays_index + self.period
        self.next_event_date = tdays[idx]
        times = self.open_and_close(self.next_event_date)
        self.market_open = times['market_open']
        self.market_close = times['market_close']            
    
There was a runtime error.
5 responses

The best part of Avellaneda's paper is the s-score. Dr. Liew extended the idea using the Black-Litterman model: http://battleofthequants.net/wp-content/uploads/2013/03/2010-10-12_US_Equity_Mean_Reversion.pdf

Thanks for sharing this algo! An interesting extension would be to build a market-neutral portfolio using the "eigen-portfolios." The first principal component is highly correlated with the market (indeed one could consider the variance of this component as representing the market effect on all firms). Perhaps a more compelling strategy would be to trade based on non-market wide correlations between firms.

Just a quick thought.....Would a simple correlation analysis of the individual stock returns vs. the DJ index returns indicate a similar pattern as your loadings graph ?

I tried so hard. If you enter different amounts you get varying amount of leverage .

I tried so hard. If you enter different amounts you get varying amount of leverage

Clone Algorithm
9
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
'''
Principle Component Analysis


http://www.math.nyu.edu/faculty/avellane/AvellanedaLeeStatArb071108.pdf
http://nbviewer.ipython.org/github/carljv/Will_it_Python/blob/master/MLFH/ch8/ch8.ipynb
'''

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from zipline.utils import tradingcalendar as calendar



def initialize(context):
    set_symbol_lookup_date('2005-01-01')
    context.stocks = symbols(
        'XLY',  'XLF',  'XLK',  'XLE',  'XLV',  'XLI',  'XLP',  'XLB',  'XLU',  
        
    )
    set_slippage(slippage.FixedSlippage(spread=0.00))
    set_commission(commission.PerShare(cost=0.01, min_trade_cost=1.0))
    context.manager = EventManager(period=5, rule_func=lambda x: True)
    context.leverage = 1.5

def handle_data(context, data):
    P = context.portfolio
    pos = P.positions
    
    market_value = 0
    for i in data:
        market_value += abs(pos[i].amount * data[i].price)
        
    record(leverage=market_value / P.portfolio_value,
           exposure=P.positions_value / P.portfolio_value)
    
    if not context.manager.signal(get_datetime()):
        return
    
    # Drop any stocks with nan values
    prices = history(60, '1d', 'price').T.dropna().T
    prices = zscore(np.log(prices))
    
    pca = PCA(n_components=1).fit(prices.corr())
    
    unit_values = -pd.Series(pca.components_[0], index=prices.columns)
    
    for i in data:
        if i in unit_values.index:
            shares = 500 * unit_values[i]
            order_target(i, shares)
        else:
            order_target(i, 0)


zscore = lambda x: (x - x.mean()) / x.std()


class EventManager(object):
    '''
    Manager for timing the entry point of periodic events.

    '''
    def __init__(self, 
                 period=1,
                 max_daily_hits=1,
                 rule_func=None):
        '''
        :Parameters:
            period: integer <default=1>            
                number of business days between events

            max_daily_hits: integer <default=1>
                upper limit on the number of times per day the event is triggered.
                (trading controls could work for this too in some cases)

            rule_func: function (returns a boolean) <default=None>
                decision function for timimng an intraday entry point
        '''
        self.period = period
        self.max_daily_hits = max_daily_hits
        self.remaining_hits = max_daily_hits
        self._rule_func = rule_func
        self.next_event_date = None
        self.market_open = None
        self.market_close = None
    
    @property
    def todays_index(self):
        dt = calendar.canonicalize_datetime(get_datetime())
        return calendar.trading_days.searchsorted(dt)
    
    def open_and_close(self, dt):
        return calendar.open_and_closes.T[dt]

    def signal(self, *args, **kwargs):
        '''
        Entry point for the rule_func
        All arguments are passed to rule_func
        '''
        now = get_datetime()
        dt = calendar.canonicalize_datetime(now)
        if self.next_event_date is None:
            self.next_event_date = dt
            times = self.open_and_close(dt)
            self.market_open = times['market_open'] 
            self.market_close = times['market_close'] 
        if now < self.market_open:
            return False
        if now == self.market_close:
            self.set_next_event_date()
        decision = self._rule_func(*args, **kwargs)
        if decision:
            self.remaining_hits -= 1
            if self.remaining_hits <= 0:
                self.set_next_event_date()
        return decision
    
    def set_next_event_date(self):
        self.remaining_hits = self.max_daily_hits
        tdays = calendar.trading_days
        idx = self.todays_index + self.period
        self.next_event_date = tdays[idx]
        times = self.open_and_close(self.next_event_date)
        self.market_open = times['market_open']
        self.market_close = times['market_close']            
    
There was a runtime error.