Back to Community
Fama-French 3 factor model doesn't work very well, did I do something wrong?

Hi, I check out the code from https://www.quantopian.com/posts/computing-the-fama-french-factors-with-pipeline-1 I want to test Fama-French 3 factor model, but it looks like the result is terrible.
Did I do anything wrong?
Any suggestion would be appreciated.

Best

Clone Algorithm
40
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
import pandas as pd
import numpy as np
from quantopian.algorithm import attach_pipeline, pipeline_output
from quantopian.pipeline import Pipeline
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import morningstar
from datetime import datetime
from quantopian.pipeline.filters import Q1500US

import statsmodels.api as sm
from statsmodels import regression


# time frame on which we want to compute Fama-French
normal_days = 31
# approximate the number of trading days in that period
# this is the number of trading days we'll look back on,
# on every trading day.
business_days = int(0.69 * normal_days)


# global em_df

ff_result=[]
date_index=[]

stock_held=15

# em_df=pd.DataFrame()



class Returns(CustomFactor):
    """
    this factor outputs the returns over the period defined by 
    business_days, ending on the previous trading day, for every security.
    """
    window_length = business_days
    inputs = [USEquityPricing.close]
    def compute(self,today,assets,out,price):
        out[:] = (price[-1] - price[0]) / price[0] * 100

class MarketEquity(CustomFactor):
    """
    this factor outputs the market cap of every security on the day.
    """
    window_length = business_days
    inputs = [morningstar.valuation.market_cap]
    def compute(self,today,assets,out,mcap):
        out[:] = mcap[0]

class BookEquity(CustomFactor):
    """
    this factor outputs the book value of every security on the day.
    """
    window_length = business_days
    inputs = [morningstar.balance_sheet.tangible_book_value]
    def compute(self,today,assets,out,book):
        out[:] = book[0]
                                        
class CommonStock(CustomFactor):
    """
    this factor outputs 1.0 for all securities that are either common stock or SPY,
    and outputs 0.0 for all other securities. This is to filter out ETFs and other
    types of share that we do not wish to consider.
    """
    window_length = business_days
    inputs = [morningstar.share_class_reference.is_primary_share]
    def compute(self,today,assets,out, share_class):
        out[:] = ((share_class[-1].astype(bool)) | (assets == 8554)).astype(float)                                     
        
def initialize(context):
    """
    use our factors to add our pipes and screens.
    """
    context.longs=[]


    pipe = Pipeline()
    common_stock = CommonStock()
    # filter down to securities that are either common stock or SPY
    pipe.set_screen(common_stock.eq(1))
    
    # pipe = Pipeline().query(common_stock.eq(1))
    attach_pipeline(pipe, 'ff_example')
    mkt_cap = MarketEquity()
    pipe.add(mkt_cap,'market_cap')
    
    book_equity = BookEquity()
    # book equity over market equity
    be_me = book_equity/mkt_cap
    pipe.add(be_me,'be_me')

    returns = Returns()
    pipe.add(returns,'returns')
    
    
    
        
    pipe2= attach_pipeline(Pipeline(), 'my_pipeline')

    my_factor = Returns(mask=Q1500US())
    pipe2.set_screen(Q1500US())
    pipe2.add(my_factor, 'my_pipeline')
    
    
    
    schedule_function(func=print_fama_french, date_rule=date_rules.every_day())
    schedule_function(my_rebalance, date_rules.week_start())
    schedule_function(func=open_positions,date_rule=date_rules.every_day(),time_rule=time_rules.market_open(hours=0,minutes=1))
    
    
def open_positions(context,data):
    long_list=context.longs
    log.info(context.longs)
    if len(long_list)==0:
        return
    port = context.portfolio.positions

    for sec in port:
        if port not in long_list and data.can_trade(sec):
            order_target_percent(sec,0)
    for sec in long_list:
        if data.can_trade(sec):
           order_target_percent(sec,1.0/len(long_list))

def print_fama_french(context, data):
    # print the Fama-French factors for the period defined by business_days
    # ending on the previous trading day.
    date_index.append(get_datetime())
    ff_result.append([context.rm_rf, context.smb, context.hml])
    
def before_trading_start(context,data):
    """
    every trading day, we use our pipes to construct the Fama-French
    portfolios, and then calculate the Fama-French factors appropriately.
    """
    spy = sid(8554)
    
    factors = pipeline_output('ff_example')
    context.stocks=factors.index
    # get the data we're going to use
    returns = factors['returns']
    mkt_cap = factors.sort(['market_cap'], ascending=True)
    be_me = factors.sort(['be_me'], ascending=True)
    
    # to compose the six portfolios, split our universe into portions
    half = int(len(mkt_cap)*0.5)
    small_caps = mkt_cap[:half]
    big_caps = mkt_cap[half:]
    
    thirty = int(len(be_me)*0.3)
    seventy = int(len(be_me)*0.7)
    growth = be_me[:thirty]
    neutral = be_me[thirty:seventy]
    value = be_me[seventy:]
    
    # now use the portions to construct the portfolios.
    # note: these portfolios are just lists (indices) of equities
    small_value = small_caps.index.intersection(value.index)
    small_neutral = small_caps.index.intersection(neutral.index)
    small_growth = small_caps.index.intersection(growth.index)
    
    big_value = big_caps.index.intersection(value.index)
    big_neutral = big_caps.index.intersection(neutral.index)
    big_growth = big_caps.index.intersection(growth.index)
    
    # take the mean to get the portfolio return, assuming uniform
    # allocation to its constituent equities.
    sv = returns[small_value].mean()
    sn = returns[small_neutral].mean()
    sg = returns[small_growth].mean()
    
    bv = returns[big_value].mean()
    bn = returns[big_neutral].mean()
    bg = returns[big_growth].mean()
    
    # computing Rm-Rf (Market Returns - Risk-Free Returns). we take the 
    # rate of risk-free returns to be zero, so this is simply SPY's returns.
    # have to set an initial dummy value
    context.rm_rf = float('nan')
    if spy in returns.index:
        context.rm_rf = returns.loc[spy]
    
    # computing SMB
    context.smb = (sv + sn + sg)/3 - (bv + bn + bg)/3
    
    # computing HML
    context.hml = (sv + bv)/2 - (sg + bg)/2
    
    # print([context.rm_rf,context.smb,context.hml])
    # ff_result.append([context.rm_rf,context.smb,context.hml])
    
    
    
def my_rebalance(context,data):
    stocks_longs=[]
    ff_df=pd.DataFrame(ff_result,index=date_index,columns=['rf','smb','hml'])

    my_pipe=pipeline_output('my_pipeline')
    his_data=data.history(my_pipe.index,'price',len(date_index),'1d')
    df4=np.diff(np.log(his_data),axis=0)+0*his_data[1:]

    # print(ff_df[1:].shape)
    # print(df4.shape)
    
    stock_alpha={}
    for sec in df4.columns:
        t_r=linreg(ff_df[1:],df4[sec])
        if(np.isnan(t_r[0])):
            return
        stock_alpha[sec]=[t_r[0]]
    for key, value in sorted(stock_alpha.iteritems(), key=lambda (k,v): (v,k)):
        # print ("%s: %s" % (key, value))
        stocks_longs.append(key)
    # scores=pd.DataFrame(stock_alpha)
    # print(scores.head())
    # print(stocks_longs[0:15])
    context.longs=stocks_longs[0:15]
# #consider kdj indicator    
#     tradable_position = []
#     for sec in stocks_longs:
#         price_history = data.history(
#             sec,
#             fields=['close','high','low'],
#             bar_count=120,
#             frequency='1d'
#         )
#         if price_history.iloc[0][0] is None:
#             continue
#         rsv = np.arange(0,12,0.1)
#         for j in range(119,10,-1):
#             lowest = 1000.0
#             highest = -1.0
#             for i in range(j,j-9,-1) :
#                 if price_history.iloc[i][2] < lowest:
#                     lowest = price_history.iloc[i][2]
#                 if price_history.iloc[i][1] > highest:
#                     highest = price_history.iloc[i][1]
#             rsv[j] = 100*(price_history.iloc[j][0] - lowest) / (highest - lowest)
#         kline = np.arange(0,12,0.1)
#         for i in range(11,120,1):
#             if i == 11:
#                 kline[i] = (rsv[i]+rsv[i-1]+rsv[i-2])/3
#             else :
#                 kline[i] = rsv[i]/3 + 2*kline[i-1]/3
    
#         dline = np.arange(0,12,0.1)
#         for i in range(11,120,1):
#             if i == 11:
#                 dline[i] = (kline[i]+kline[i-1]+kline[i-2])/3
#             else :
#                 dline[i] = kline[i]/3 + 2*dline[i-1]/3

#         if data.can_trade(sec) and kline[119] < dline[119]+10 and kline[119] > dline[119]-10 and kline[119] >= kline[117] and kline[117] < dline[117] and dline[119] <= 40:
#             tradable_position.append(sec)
#     if len(tradable_position) != 0:
#         weights = 1.0/len(tradable_position)
#         for sec in tradable_position:
#                 order_target_percent(sec, weights)
#                 log.info(str(sec)+' '+str(weights))
#     log.info(len(tradable_position))
#     log.info('stock value: '+str(context.portfolio.positions_value))
#     log.info('cash: '+str(context.portfolio.cash))
#     log.info(len(my_positions))
#     context.longs=tradable_position[0:15]
    
    
    
    
    
    
    
    
    
    
    
    # print(context.longs)
    
    # print(his_data.head())
    
    
    # print(ff_df.head())
    
def linreg(X,Y,columns=3):
    X=sm.add_constant(np.array(X))
    Y=np.array(Y)
    if len(Y)>business_days:
        results = regression.linear_model.OLS(Y, X).fit()
        return results.params
    else:
        return [float("nan")]*(columns+1)    
There was a runtime error.
1 response

It seems very slow to show backtest result, I will attach another one here

Clone Algorithm
40
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
import pandas as pd
import numpy as np
from quantopian.algorithm import attach_pipeline, pipeline_output
from quantopian.pipeline import Pipeline
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import morningstar
from datetime import datetime
from quantopian.pipeline.filters import Q1500US

import statsmodels.api as sm
from statsmodels import regression


# time frame on which we want to compute Fama-French
normal_days = 31
# approximate the number of trading days in that period
# this is the number of trading days we'll look back on,
# on every trading day.
business_days = int(0.69 * normal_days)


# global em_df

ff_result=[]
date_index=[]

stock_held=15

# em_df=pd.DataFrame()



class Returns(CustomFactor):
    """
    this factor outputs the returns over the period defined by 
    business_days, ending on the previous trading day, for every security.
    """
    window_length = business_days
    inputs = [USEquityPricing.close]
    def compute(self,today,assets,out,price):
        out[:] = (price[-1] - price[0]) / price[0] * 100

class MarketEquity(CustomFactor):
    """
    this factor outputs the market cap of every security on the day.
    """
    window_length = business_days
    inputs = [morningstar.valuation.market_cap]
    def compute(self,today,assets,out,mcap):
        out[:] = mcap[0]

class BookEquity(CustomFactor):
    """
    this factor outputs the book value of every security on the day.
    """
    window_length = business_days
    inputs = [morningstar.balance_sheet.tangible_book_value]
    def compute(self,today,assets,out,book):
        out[:] = book[0]
                                        
class CommonStock(CustomFactor):
    """
    this factor outputs 1.0 for all securities that are either common stock or SPY,
    and outputs 0.0 for all other securities. This is to filter out ETFs and other
    types of share that we do not wish to consider.
    """
    window_length = business_days
    inputs = [morningstar.share_class_reference.is_primary_share]
    def compute(self,today,assets,out, share_class):
        out[:] = ((share_class[-1].astype(bool)) | (assets == 8554)).astype(float)                                     
        
def initialize(context):
    """
    use our factors to add our pipes and screens.
    """
    context.longs=[]


    pipe = Pipeline()
    common_stock = CommonStock()
    # filter down to securities that are either common stock or SPY
    pipe.set_screen(common_stock.eq(1))
    
    # pipe = Pipeline().query(common_stock.eq(1))
    attach_pipeline(pipe, 'ff_example')
    mkt_cap = MarketEquity()
    pipe.add(mkt_cap,'market_cap')
    
    book_equity = BookEquity()
    # book equity over market equity
    be_me = book_equity/mkt_cap
    pipe.add(be_me,'be_me')

    returns = Returns()
    pipe.add(returns,'returns')
    
    
    
        
    pipe2= attach_pipeline(Pipeline(), 'my_pipeline')

    my_factor = Returns(mask=Q1500US())
    pipe2.set_screen(Q1500US())
    pipe2.add(my_factor, 'my_pipeline')
    
    
    
    schedule_function(func=print_fama_french, date_rule=date_rules.every_day())
    schedule_function(my_rebalance, date_rules.week_start())
    schedule_function(func=open_positions,date_rule=date_rules.every_day(),time_rule=time_rules.market_open(hours=0,minutes=1))
    
    
def open_positions(context,data):
    long_list=context.longs
    log.info(context.longs)
    if len(long_list)==0:
        return
    port = context.portfolio.positions

    for sec in port:
        if port not in long_list and data.can_trade(sec):
            order_target_percent(sec,0)
    for sec in long_list:
        if data.can_trade(sec):
           order_target_percent(sec,1.0/len(long_list))

def print_fama_french(context, data):
    # print the Fama-French factors for the period defined by business_days
    # ending on the previous trading day.
    date_index.append(get_datetime())
    ff_result.append([context.rm_rf, context.smb, context.hml])
    
def before_trading_start(context,data):
    """
    every trading day, we use our pipes to construct the Fama-French
    portfolios, and then calculate the Fama-French factors appropriately.
    """
    spy = sid(8554)
    
    factors = pipeline_output('ff_example')
    context.stocks=factors.index
    # get the data we're going to use
    returns = factors['returns']
    mkt_cap = factors.sort(['market_cap'], ascending=True)
    be_me = factors.sort(['be_me'], ascending=True)
    
    # to compose the six portfolios, split our universe into portions
    half = int(len(mkt_cap)*0.5)
    small_caps = mkt_cap[:half]
    big_caps = mkt_cap[half:]
    
    thirty = int(len(be_me)*0.3)
    seventy = int(len(be_me)*0.7)
    growth = be_me[:thirty]
    neutral = be_me[thirty:seventy]
    value = be_me[seventy:]
    
    # now use the portions to construct the portfolios.
    # note: these portfolios are just lists (indices) of equities
    small_value = small_caps.index.intersection(value.index)
    small_neutral = small_caps.index.intersection(neutral.index)
    small_growth = small_caps.index.intersection(growth.index)
    
    big_value = big_caps.index.intersection(value.index)
    big_neutral = big_caps.index.intersection(neutral.index)
    big_growth = big_caps.index.intersection(growth.index)
    
    # take the mean to get the portfolio return, assuming uniform
    # allocation to its constituent equities.
    sv = returns[small_value].mean()
    sn = returns[small_neutral].mean()
    sg = returns[small_growth].mean()
    
    bv = returns[big_value].mean()
    bn = returns[big_neutral].mean()
    bg = returns[big_growth].mean()
    
    # computing Rm-Rf (Market Returns - Risk-Free Returns). we take the 
    # rate of risk-free returns to be zero, so this is simply SPY's returns.
    # have to set an initial dummy value
    context.rm_rf = float('nan')
    if spy in returns.index:
        context.rm_rf = returns.loc[spy]
    
    # computing SMB
    context.smb = (sv + sn + sg)/3 - (bv + bn + bg)/3
    
    # computing HML
    context.hml = (sv + bv)/2 - (sg + bg)/2
    
    # print([context.rm_rf,context.smb,context.hml])
    # ff_result.append([context.rm_rf,context.smb,context.hml])
    
    
    
def my_rebalance(context,data):
    stocks_longs=[]
    ff_df=pd.DataFrame(ff_result,index=date_index,columns=['rf','smb','hml'])

    my_pipe=pipeline_output('my_pipeline')
    his_data=data.history(my_pipe.index,'price',len(date_index),'1d')
    df4=np.diff(np.log(his_data),axis=0)+0*his_data[1:]

    # print(ff_df[1:].shape)
    # print(df4.shape)
    
    stock_alpha={}
    for sec in df4.columns:
        t_r=linreg(ff_df[1:],df4[sec])
        if(np.isnan(t_r[0])):
            return
        stock_alpha[sec]=[t_r[0]]
    for key, value in sorted(stock_alpha.iteritems(), key=lambda (k,v): (v,k)):
        # print ("%s: %s" % (key, value))
        stocks_longs.append(key)
    # scores=pd.DataFrame(stock_alpha)
    # print(scores.head())
    # print(stocks_longs[0:15])
    context.longs=stocks_longs[0:15]
# #consider kdj indicator    
#     tradable_position = []
#     for sec in stocks_longs:
#         price_history = data.history(
#             sec,
#             fields=['close','high','low'],
#             bar_count=120,
#             frequency='1d'
#         )
#         if price_history.iloc[0][0] is None:
#             continue
#         rsv = np.arange(0,12,0.1)
#         for j in range(119,10,-1):
#             lowest = 1000.0
#             highest = -1.0
#             for i in range(j,j-9,-1) :
#                 if price_history.iloc[i][2] < lowest:
#                     lowest = price_history.iloc[i][2]
#                 if price_history.iloc[i][1] > highest:
#                     highest = price_history.iloc[i][1]
#             rsv[j] = 100*(price_history.iloc[j][0] - lowest) / (highest - lowest)
#         kline = np.arange(0,12,0.1)
#         for i in range(11,120,1):
#             if i == 11:
#                 kline[i] = (rsv[i]+rsv[i-1]+rsv[i-2])/3
#             else :
#                 kline[i] = rsv[i]/3 + 2*kline[i-1]/3
    
#         dline = np.arange(0,12,0.1)
#         for i in range(11,120,1):
#             if i == 11:
#                 dline[i] = (kline[i]+kline[i-1]+kline[i-2])/3
#             else :
#                 dline[i] = kline[i]/3 + 2*dline[i-1]/3

#         if data.can_trade(sec) and kline[119] < dline[119]+10 and kline[119] > dline[119]-10 and kline[119] >= kline[117] and kline[117] < dline[117] and dline[119] <= 40:
#             tradable_position.append(sec)
#     if len(tradable_position) != 0:
#         weights = 1.0/len(tradable_position)
#         for sec in tradable_position:
#                 order_target_percent(sec, weights)
#                 log.info(str(sec)+' '+str(weights))
#     log.info(len(tradable_position))
#     log.info('stock value: '+str(context.portfolio.positions_value))
#     log.info('cash: '+str(context.portfolio.cash))
#     log.info(len(my_positions))
#     context.longs=tradable_position[0:15]
    
    
    
    
    
    
    
    
    
    
    
    # print(context.longs)
    
    # print(his_data.head())
    
    
    # print(ff_df.head())
    
def linreg(X,Y,columns=3):
    X=sm.add_constant(np.array(X))
    Y=np.array(Y)
    if len(Y)>business_days:
        results = regression.linear_model.OLS(Y, X).fit()
        return results.params
    else:
        return [float("nan")]*(columns+1)    
There was a runtime error.