Back to Community
multi-factor example algo

Any suggestions for improving the basic framework?

Clone Algorithm
9
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
from quantopian.algorithm import attach_pipeline, pipeline_output, order_optimal_portfolio
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.factors import Returns,AnnualizedVolatility
import quantopian.optimize as opt
from sklearn import preprocessing
from quantopian.pipeline.experimental import risk_loading_pipeline
from quantopian.pipeline.filters import QTradableStocksUS
from scipy.stats.mstats import winsorize
 
import numpy as np
 
MAX_GROSS_EXPOSURE = 1.0
NUM_LONG_POSITIONS = 200
NUM_SHORT_POSITIONS = NUM_LONG_POSITIONS
MAX_POSITION_SIZE = 0.01
 
# Factor preprocessing settings
WIN_LIMIT = 0.005 # factor preprocess winsorize limit
        
def make_factors():
   
    class mean_rev(CustomFactor):   
        inputs = [USEquityPricing.open,USEquityPricing.high,USEquityPricing.low,USEquityPricing.close]
        window_length = 30
        def compute(self, today, assets, out, open, high, low, close):
            
            p = (open + high + low + close)/4
            
            m = len(close[0,:])
            
            b = np.zeros(m)
            w = np.zeros(m)
 
            for k in range(10,m+1):
                b = np.nanmean(p[-k:,:])/p[-1,:]
                wt = np.nansum(b)
                b += wt*b
                w += wt
                
            b = b/w
            
            a = np.zeros(m)
            w = np.zeros(m)
            
            for k in range(10,m+1):
                a = p[-1,:]/np.nanmean(p[-k:,:])
                wt = np.nansum(a)
                a += wt*a
                w += wt
                
            a = a/w
            
            a = preprocess(a)
            a[a<0] = 0
            b = preprocess(b)
            b[b<0] = 0
 
            out[:] = preprocess(b - a)
            
    class fcf(CustomFactor):
        inputs = [Fundamentals.fcf_yield]
        window_length = 1
        def compute(self, today, assets, out, fcf_yield):
            out[:] = preprocess(np.nan_to_num(fcf_yield[-1]))
            
    class earn_yield(CustomFactor):
        inputs = [Fundamentals.earning_yield]
        window_length = 1
        def compute(self, today, assets, out, earn_yield):
            out[:] = preprocess(np.nan_to_num(earn_yield[-1]))
            
    class Volatility(CustomFactor):    
        inputs = [USEquityPricing.high,USEquityPricing.low,USEquityPricing.close]
        window_length = 5
        def compute(self, today, assets, out, high, low, close):
            p = (high-low)/close
            out[:] = preprocess(-np.nansum(p,axis=0))
               
    return {
            'MeanRev':              mean_rev,
            'FCF':                  fcf,
            'Earn_Yield':           earn_yield,
            'Volatility':           Volatility,
           }
 
def make_pipeline():
    
    universe = (
        AnnualizedVolatility(mask=QTradableStocksUS())
        .percentile_between(75,95))
    
    factors = make_factors()
    
    combined_alpha = None
    for name, f in factors.iteritems():
        if combined_alpha == None:
            combined_alpha = f(mask=universe)
        else:
            combined_alpha += f(mask=universe)
 
    longs = combined_alpha.top(NUM_LONG_POSITIONS)
    shorts = combined_alpha.bottom(NUM_SHORT_POSITIONS)
 
    long_short_screen = (longs | shorts)
 
    pipe = Pipeline(columns = {
        'combined_alpha':combined_alpha,
    },
    screen = long_short_screen)
    return pipe
 
def initialize(context):
 
    attach_pipeline(make_pipeline(), 'long_short_equity_template')
    attach_pipeline(risk_loading_pipeline(), 'risk_loading_pipeline')
 
    # Schedule my rebalance function
    schedule_function(func=rebalance,
                      date_rule=date_rules.every_day(),
                      time_rule=time_rules.market_open(minutes=60),
                      half_days=True)
    # record my portfolio variables at the end of day
    schedule_function(func=recording_statements,
                      date_rule=date_rules.every_day(),
                      time_rule=time_rules.market_close(),
                      half_days=True)
    
    # set_commission(commission.PerShare(cost=0, min_trade_cost=0))
    # set_slippage(slippage.FixedSlippage(spread=0))
    
def before_trading_start(context, data):
 
    context.pipeline_data = pipeline_output('long_short_equity_template')
    context.risk_loading_pipeline = pipeline_output('risk_loading_pipeline')
 
def recording_statements(context, data):
 
    record(num_positions=len(context.portfolio.positions))
    record(leverage=context.account.leverage)
 
def rebalance(context, data):
    
    pipeline_data = context.pipeline_data
 
    # demean and normalize
    combined_alpha = pipeline_data.combined_alpha - pipeline_data.combined_alpha.mean()    
    combined_alpha = combined_alpha/combined_alpha.abs().sum()
    
    objective = opt.MaximizeAlpha(combined_alpha.dropna())
    
    constraints = []
    
    constraints.append(opt.MaxGrossExposure(MAX_GROSS_EXPOSURE))
    
    constraints.append(opt.DollarNeutral())
    
    constraints.append(
        opt.PositionConcentration.with_equal_bounds(
            min=-MAX_POSITION_SIZE,
            max=MAX_POSITION_SIZE
        ))
    
    risk_model_exposure = opt.experimental.RiskModelExposure(
        context.risk_loading_pipeline,
        version=opt.Newest,
    )
      
    constraints.append(risk_model_exposure)
    
    order_optimal_portfolio(
                objective=objective,
                constraints=constraints,
                )
 
def preprocess(a):
    
    a = winsorize(a,limits=(WIN_LIMIT,WIN_LIMIT))
    
    return preprocessing.scale(a)
There was a runtime error.
3 responses

@Grant,
Looks good to me...is clean...and is pretty much the overall structure we are using. Thanks for publishing your template!

The only thing I'd add is an enhancement to do everything you are doing, but inside sectors or other types of clusters, on the hope that would allow a focus more on signal and less on noise. Grouping smaller amounts of assets together that have a common thread will allow for an overall reduction of computational power needed to use more sophisticated factor computations.

The economic thesis that I see here is an implicit assumption that your combined alpha factor uses top vs. bottom assets as an arbitrage that produces positive alpha over all time and all conditions. Even with sector/cluster confinement, this is a tall task.
We've been looking at getting arbitrage inside sectors/clusters, with more risk_on/risk_off regime signals.
No overall success yet...of course, as soon as we get success, we'll go dark!...grin...
alan

Thanks Alan -

I suppose you are saying run the factors independently on each of the 11 risk model sectors (see https://www.quantopian.com/papers/risk), using Pipeline masking?

For example, my first factor would be:

    combined_alpha_materials = None  
    for name, f in factors.iteritems():  
        if combined_alpha_materials == None:  
            combined_alpha_materials = f(mask=universe_materials)  
        else:  
            combined_alpha_materials += f(mask=universe_materials)  

Would I then sum over all 11 combined_alpha terms, yielding the final combined_alpha?

And then apply:

    longs = combined_alpha.top(NUM_LONG_POSITIONS)  
    shorts = combined_alpha.bottom(NUM_SHORT_POSITIONS)  
    long_short_screen = (longs | shorts)  
    pipe = Pipeline(columns = {  
        'combined_alpha':combined_alpha,  
    },  
    screen = long_short_screen)  
    return pipe  

Sounds relatively straightforward to code.

One thought would be to use the sector ETFs for volatility weighting of the factors.

Any that are importable can be experimented with like this

from quantopian.pipeline.experimental import BasicMaterials, CommunicationServices, ConsumerCyclical, ConsumerDefensive, Energy, FinancialServices, HealthCare, Industrials, Momentum, RealEstate, ShortTermReversal, Size, Technology, Utilities, Value, Volatility

                                      # alone but only one week just for illustration  
    bmt = BasicMaterials()            # -  .14  
    com = CommunicationServices()     # - 1.1  
    cyc = ConsumerCyclical()          # - 1.5  
    cdf = ConsumerDefensive()         #   2.5  
    eng = Energy()                    #    .48  
    fin = FinancialServices()         # - 1.2  
    hlt = HealthCare()                # - .46  
    ind = Industrials()               # - .72  
    mom = Momentum()                  # -1.16  
    rst = RealEstate()                #   .86  
    siz = Size()                      #   .02  
    srv = ShortTermReversal()         # - .25  
    tec = Technology()                # -1.1  
    utl = Utilities()                 #  1.1  
    val = Value()                     #   .64  
    vlt = Volatility()                # -1.2  
Clone Algorithm
9
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
# https://www.quantopian.com/posts/multi-factor-example-algo

from quantopian.algorithm import attach_pipeline, pipeline_output, order_optimal_portfolio
from quantopian.pipeline  import Pipeline
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.experimental import risk_loading_pipeline
from quantopian.pipeline.factors import CustomFactor, Returns,AnnualizedVolatility
from quantopian.pipeline.filters import QTradableStocksUS
from scipy.stats.mstats import winsorize
from sklearn import preprocessing
import quantopian.optimize as opt
import numpy  as np
import pandas as pd

from quantopian.pipeline.experimental import BasicMaterials, CommunicationServices, ConsumerCyclical, ConsumerDefensive, Energy, FinancialServices, HealthCare, Industrials, Momentum, RealEstate, ShortTermReversal, Size, Technology, Utilities, Value, Volatility 

NUM_POSITIONS = 200
WIN_LIMIT     = 0.005   # factor preprocess winsorize limit

def make_pipeline():
    lo = 5 ; hi = 100 - lo
    m  = QTradableStocksUS()    # mask
    #m &= AnnualizedVolatility(mask=m).percentile_between(35, 65, mask=m)

    # try zscore instead of rank or both vs neither
    #vol = VOLATILITY(mask=m).zscore().rank(mask=m) ; m &= (vol.percentile_between(lo, hi))    #   .03
    yld = EARN_YIELD(mask=m).zscore().rank(mask=m) ; m &= (yld.percentile_between(lo, hi))    #   .27
    fcf = FCF       (mask=m).zscore().rank(mask=m) ; m &= (fcf.percentile_between(lo, hi))    # - .26
    rev = MEAN_REV  (mask=m).zscore().rank(mask=m) ; m &= (rev.percentile_between(lo, hi))    # - .17

                                      # alone but only one week just for illustration
    bmt = BasicMaterials()            # -  .14
    com = CommunicationServices()     # - 1.1
    cyc = ConsumerCyclical()          # - 1.5
    cdf = ConsumerDefensive()         #   2.5
    eng = Energy()                    #    .48
    fin = FinancialServices()         # - 1.2
    hlt = HealthCare()                # - .46
    ind = Industrials()               # - .72
    mom = Momentum()                  # -1.16
    rst = RealEstate()                #   .86
    siz = Size()                      #   .02
    srv = ShortTermReversal()         # - .25
    tec = Technology()                # -1.1
    utl = Utilities()                 #  1.1
    val = Value()                     #   .64
    vlt = Volatility()                # -1.2
    
    # try minus in front or comment some out. try one at a time. add some.
    #alpha  = -vol
    #alpha  = -cyc
    alpha  = cdf
    alpha += rst
    alpha += val
    
    #alpha = rev   # alone, override previous
    
    alpha  = alpha.rank()

    m &= ( alpha.top(NUM_POSITIONS) | alpha.bottom(NUM_POSITIONS) )
    #m &= alpha.percentile_between(lo, hi)

    return Pipeline(
        screen  = m,
        columns = {
            'alpha'    : alpha,
            'MeanRev'  : rev,
            'FCF'      : fcf,
            'Yld'      : yld,
            #'Vol'      : vol,
            
            'bmt' : bmt,
            'com' : com,
            'cyc' : cyc,
            'def' : cdf,
            'eng' : eng,
            'fin' : fin,
            'hlt' : hlt,
            'ind' : ind,
            'mom' : mom,
            'rst' : rst,
            'siz' : siz,
            'str' : srv,
            'tec' : tec,
            'utl' : utl,
            'val' : val,
            'vlt' : vlt,
        }
    )

def initialize(context):
    attach_pipeline(make_pipeline(), 'pipe')
    attach_pipeline(risk_loading_pipeline(), 'risk_pipe')

    schedule_function(trade, date_rules.every_day(), time_rules.market_open(minutes=6))

def trade(context, data):
    # demean and normalize
    alpha = context.out.alpha  #.rank()
    alpha = alpha - alpha.mean()
    alpha = alpha / alpha.abs().sum()
    alpha = alpha.dropna()

    conc = 1.0 / len(alpha)

    order_optimal_portfolio(
        #objective   = opt.TargetWeights(-alpha),
        objective   = opt.MaximizeAlpha(alpha),
        constraints = [
            opt.MaxGrossExposure(1.0),
            opt.DollarNeutral(),
            opt.PositionConcentration.with_equal_bounds(min = -conc, max = conc),
            #opt.experimental.RiskModelExposure(context.risk_loading_pipeline, version=opt.Newest),
        ]
    )

def before_trading_start(context, data):
    context.out = pipeline_output('pipe')
    context.risk_loading_pipeline = pipeline_output('risk_pipe').dropna()

    record(num_positions = len(context.portfolio.positions))
    record(leverage      = context.account.leverage)

    if 'log_data_done' not in context:    # show values once
        log_data(context, data, context.out, 4)  # all fields (columns) if unspecified
        #log_data(context, data, context.out, 4, fields=['alpha', 'beta', ... or whatever you say])

class FCF(CustomFactor):
    inputs = [Fundamentals.fcf_yield] ; window_length = 21
    def compute(self, today, assets, out, fcf_yield):
        fcf_yield = nanfill(fcf_yield)
        out[:] = preprocess(np.mean(fcf_yield[-2:], axis=0))

class EARN_YIELD(CustomFactor):
    inputs = [Fundamentals.earning_yield] ; window_length = 21
    def compute(self, today, assets, out, earn_yield):
        earn_yield = nanfill(earn_yield)
        out[:] = preprocess(np.nan_to_num(np.mean(earn_yield[-2:], axis=0)))

class VOLATILITY(CustomFactor):
    inputs = [USEquityPricing.high,USEquityPricing.low,USEquityPricing.close] ; window_length = 1
    def compute(self, today, assets, out, high, low, close):
        high  = nanfill(high)
        low   = nanfill(low)
        close = nanfill(close)
        p = (high - low) / close
        out[:] = preprocess(-np.nansum(p, axis=0))

class MEAN_REV(CustomFactor):
    inputs = [USEquityPricing.open,USEquityPricing.high,USEquityPricing.low,USEquityPricing.close]
    window_length = 30
    def compute(self, today, assets, out, open, high, low, close):
        high  = nanfill(high)
        low   = nanfill(low)
        close = nanfill(close)

        p = (open + high + low + close)/4

        m = len(close[0,:])

        b = np.zeros(m)
        w = np.zeros(m)

        for k in range(10,m+1):
            b = np.nanmean(p[-k:,:])/p[-1,:]
            wt = np.nansum(b)
            b += wt*b
            w += wt

        b = b/w

        a = np.zeros(m)
        w = np.zeros(m)

        for k in range(10,m+1):
            a = p[-1,:]/np.nanmean(p[-k:,:])
            wt = np.nansum(a)
            a += wt*a
            w += wt

        a = a/w

        a = preprocess(a)
        a[a<0] = 0
        b = preprocess(b)
        b[b<0] = 0

        out[:] = preprocess(b - a)

def preprocess(a):
    return preprocessing.scale(winsorize(a,limits=(WIN_LIMIT,WIN_LIMIT)))

def log_data(context, data, z, num, fields=None):
    ''' Log info about pipeline output or, z can be any DataFrame or Series
    https://www.quantopian.com/posts/overview-of-pipeline-content-easy-to-add-to-your-backtest
    '''
    if 'log_init_done' not in context:  # {:,} magic for adding commas
        log.info('${:,}    {} to {}'.format(int(context.portfolio.starting_cash),
                get_environment('start').date(), get_environment('end').date()))
        context.log_data_done = 1

    if not len(z):
        log.info('Empty')
        return

    # Options
    log_nan_only = 0          # Only log if nans are present
    show_sectors = 0          # If sectors, do you want to see them or not
    show_sorted_details = 1   # [num] high & low securities sorted, each column
    padmax = 6                # num characters for each field, starting point

    # Series ......
    if 'Series' in str(type(z)):    # is Series, not DataFrame
        nan_count = len(z[z != z])
        nan_count = 'NaNs {}/{}'.format(nan_count, len(z)) if nan_count else ''
        if (log_nan_only and nan_count) or not log_nan_only:
            pad = max( padmax, len('%.5f' % z.max()) )
            log.info('{}{}{}   Series  len {}'.format('min'.rjust(pad+5),
                'mean'.rjust(pad+5), 'max'.rjust(pad+5), len(z)))
            log.info('{}{}{} {}'.format(
                ('%.5f' % z.min()) .rjust(pad+5),
                ('%.5f' % z.mean()).rjust(pad+5),
                ('%.5f' % z.max()) .rjust(pad+5),
                nan_count
            ))
            log.info('High\n{}'.format(z.sort_values(ascending=False).head(num)))
            log.info('Low\n{}' .format(z.sort_values(ascending=False).tail(num)))
        return

    # DataFrame ......
    content_min_max = [ ['','min','mean','max',''] ] ; content = ''
    for col in z.columns:
        try: z[col].max()
        except: continue   # skip non-numeric
        if col == 'sector' and not show_sectors: continue
        nan_count = len(z[col][z[col] != z[col]])
        nan_count = 'NaNs {}/{}'.format(nan_count, len(z)) if nan_count else ''
        padmax    = max( padmax, len(str(z[col].max())) )
        content_min_max.append([col, str(z[col] .min()), str(z[col].mean()), str(z[col] .max()), nan_count])
    if log_nan_only and nan_count or not log_nan_only:
        content = 'Rows: {}  Columns: {}'.format(z.shape[0], z.shape[1])
        if len(z.columns) == 1: content = 'Rows: {}'.format(z.shape[0])

        paddings = [6 for i in range(4)]
        for lst in content_min_max:    # set max lengths
            i = 0
            for val in lst[:4]:    # value in each sub-list
                paddings[i] = max(paddings[i], len(str(val)))
                i += 1
        headr = content_min_max[0]
        content += ('\n{}{}{}{}{}'.format(
             headr[0] .rjust(paddings[0]),
            (headr[1]).rjust(paddings[1]+5),
            (headr[2]).rjust(paddings[2]+5),
            (headr[3]).rjust(paddings[3]+5),
            ''
        ))
        for lst in content_min_max[1:]:    # populate content using max lengths
            content += ('\n{}{}{}{}     {}'.format(
                lst[0].rjust(paddings[0]),
                lst[1].rjust(paddings[1]+5),
                lst[2].rjust(paddings[2]+5),
                lst[3].rjust(paddings[3]+5),
                lst[4],
            ))
        log.info(content)

    if not show_sorted_details: return
    if len(z.columns) == 1:     return     # skip detail if only 1 column
    if fields == None: details = z.columns
    for detail in details:
        if detail == 'sector' and not show_sectors: continue
        hi = z[details].sort_values(by=detail, ascending=False).head(num)
        lo = z[details].sort_values(by=detail, ascending=False).tail(num)
        content  = ''
        content += ('_ _ _   {}   _ _ _'  .format(detail))
        content += ('\n\t... {} highs\n{}'.format(detail, str(hi)))
        content += ('\n\t... {} lows \n{}'.format(detail, str(lo)))
        if log_nan_only and not len(lo[lo[detail] != lo[detail]]):
            continue  # skip if no nans
        log.info(content)

def nanfill(_in):    # https://stackoverflow.com/questions/41190852/most-efficient-way-to-forward-fill-nan-values-in-numpy-array
    mask = np.isnan(_in)
    idx = np.where(~mask,np.arange(mask.shape[1]),0)
    np.maximum.accumulate(idx,axis=1, out=idx)
    _in[mask] = _in[np.nonzero(mask)[0], idx[mask]]
    return _in
There was a runtime error.