Back to Community
Inconsistent backtest results with identical code.

I've run 3 backtests on the same piece of code on the same dates. Returns range from 10% to 200%. I'm not sure what is causing the discrepancies since the algorithm is quite simple and does not rely imported data from outside quantopian that can be updated/edited between backtests. I am just using price data to track the volatility of GLD and trade based on the price returns of VXX.

The code contains some variables and such that aren't used, as I was still experimenting with the code to find any possible leads. In addition, there is a large block of code commented out at the bottom from an example algo that I was using as a template for machine learning. These parts are identical across all 3 backtests, but I just wanted to clarify in case someone got confused with my code and what I was planning.

Clone Algorithm
4
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from collections import Counter
import numpy 
import math


def initialize(context):
    context.assets = [sid(26807)]
                    
                      
    context.VXX = sid(38054)
    context.stocks = symbols('XLY',  # XLY Consumer Discrectionary SPDR Fund   
                           'XLF',  # XLF Financial SPDR Fund  
                           'XLK',  # XLK Technology SPDR Fund  
                           'XLE',  # XLE Energy SPDR Fund  
                           'XLV',  # XLV Health Care SPRD Fund  
                           'XLI',  # XLI Industrial SPDR Fund  
                           'XLP',  # XLP Consumer Staples SPDR Fund   
                           'XLB',  # XLB Materials SPDR Fund  
                           'XLU')  # XLU Utilities SPRD Fund
    
    context.historical_bars = 200
    context.feature_window = 10
    context.long_range = 50 #the long range which we compare volatilities for
    context.short_range = 20
    context.buy = []
    context.p =0
    schedule_function(
        func=buying,
        date_rule=date_rules.every_day(),
        time_rule=time_rules.market_open(minutes=45),
        half_days=True
  )

def before_trading_start(context,data):
    
    context.buy = []
    VXX_price_list = data.history(context.VXX, "price", context.historical_bars - context.long_range, "1d")
    VXZ_price_list = data.history(sid(38055), "price", context.historical_bars - context.long_range, "1d")
   
    
    
    for stock in context.assets:
        
        price_list = data.history(stock, "price", context.historical_bars, "1d").tolist()
        log = []
        X = []
        y = []
        log_returns = []
        feature = []
        
        #produces logged returns of the last 150 trading days
        for i in range (1, len(price_list)):
            log_returns.append( math.log( (price_list[i]) / (price_list[i-1]) ) )
            
        #look at the std/volatility of the last 50 and 20 trading days. Take their ratio and append it to feature.    
        for i in range (0, context.historical_bars - context.long_range):
            log = log_returns[0+i : context.long_range+i]
            long_vol = numpy.std(log)
            short_vol = numpy.std(log[-context.short_range:])
            feature.append(long_vol / short_vol) 
        
        start_bar = context.feature_window
        bar = start_bar
        
        #We break feature into feature windows. 
        while bar < len(feature)-2:
            #making our labels
            """end_price = VXX_price_list[bar+1]
            begin_price = VXX_price_list[bar]
            
            if end_price > begin_price:
                label = 1
            else:
                label = -1"""
            difference = ((VXX_price_list[bar+2]-VXX_price_list[bar])/(VXX_price_list[bar]))
            if difference > .02:
                label = 1
            elif difference <-.02: 
                label = -1
            else:
                label = 0
                
            window = []
            xx = 0
            
            for _ in range(context.feature_window):
                feat = feature[bar-(context.feature_window-xx)]
                window.append(feat)
                xx += 1
            X.append(window)
            y.append(label)
            bar+=1
        
        
        clf = RandomForestClassifier()

        last_prices = feature[-context.feature_window:]

        X.append(last_prices)
        X = preprocessing.scale(X)

        current_features = X[-1]
        X = X[:-1]

        clf.fit(X,y)
        context.p = clf.predict(current_features)[0]
        if context.p == 1:
            context.buy.append(context.p)
         

        print(('Prediction',context.p))
            
def buying (context,data):
    if context.p == 1:
        order_target_percent(sid(38054),1)
        #order_target_percent(sid(38055),-.5)
    
    elif context.p == -1: 
        order_target_percent(sid(38054),-1)
    else:
        order_target_percent(sid(38054),0)
        
        #order_target_percent(sid(38055),.5)
    """else: 
        order_target_percent(sid(14516),0)
        order_target_percent(sid(38055),0)"""
    """elif len(context.buy) == 2:
        order_target_percent(sid(38054),1)
        order_target_percent(sid(38055),0)
    elif len(context.buy) == 1:
        order_target_percent(sid(38054),.5)
        order_target_percent(sid(38055),-.5)"""
     
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
"""def buying(context, data):
    prices = history(bar_count = context.historical_bars, frequency='1d', field='price')

    for stock in context.stocks:
        try:
            ma1 = data[stock].mavg(50)
            ma2 = data[stock].mavg(200)

            start_bar = context.feature_window
            price_list = prices[stock].tolist()

            X = []
            y = []

            bar = start_bar

            # feature creation
            while bar < len(price_list)-1:
                try:
                    end_price = price_list[bar+1]
                    begin_price = price_list[bar]

                    pricing_list = []
                    xx = 0
                    for _ in range(context.feature_window):
                        price = price_list[bar-(context.feature_window-xx)]
                        pricing_list.append(price)
                        xx += 1

                    features = np.around(np.diff(pricing_list) / pricing_list[:-1] * 100.0, 1)


                    #print(features)

                    if end_price > begin_price:
                        label = 1
                    else:
                        label = -1

                    bar += 1
                    X.append(features)
                    y.append(label)

                except Exception as e:
                    bar += 1
                    print(('feature creation',str(e)))




            clf = RandomForestClassifier()

            last_prices = price_list[-context.feature_window:]
            current_features = np.around(np.diff(last_prices) / last_prices[:-1] * 100.0, 1)

            X.append(current_features)
            X = preprocessing.scale(X)

            current_features = X[-1]
            X = X[:-1]

            clf.fit(X,y)
            p = clf.predict(current_features)[0]

            print(('Prediction',p))
            if p == 1:
                order_target_percent(stock,0.11)
            elif p == -1:
                order_target_percent(stock,-0.11)            

        except Exception as e:
            print(str(e))
            
            
    record('ma1',ma1)
    record('ma2',ma2)
    record('Leverage',context.account.leverage)"""
There was a runtime error.
5 responses

Here is a different backtest

Clone Algorithm
4
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from collections import Counter
import numpy 
import math


def initialize(context):
    context.assets = [sid(26807)]
                    
                      
    context.VXX = sid(38054)
    context.stocks = symbols('XLY',  # XLY Consumer Discrectionary SPDR Fund   
                           'XLF',  # XLF Financial SPDR Fund  
                           'XLK',  # XLK Technology SPDR Fund  
                           'XLE',  # XLE Energy SPDR Fund  
                           'XLV',  # XLV Health Care SPRD Fund  
                           'XLI',  # XLI Industrial SPDR Fund  
                           'XLP',  # XLP Consumer Staples SPDR Fund   
                           'XLB',  # XLB Materials SPDR Fund  
                           'XLU')  # XLU Utilities SPRD Fund
    
    context.historical_bars = 200
    context.feature_window = 10
    context.long_range = 50 #the long range which we compare volatilities for
    context.short_range = 20
    context.buy = []
    context.p =0
    schedule_function(
        func=buying,
        date_rule=date_rules.every_day(),
        time_rule=time_rules.market_open(minutes=45),
        half_days=True
  )

def before_trading_start(context,data):
    
    context.buy = []
    VXX_price_list = data.history(context.VXX, "price", context.historical_bars - context.long_range, "1d")
    VXZ_price_list = data.history(sid(38055), "price", context.historical_bars - context.long_range, "1d")
   
    
    
    for stock in context.assets:
        
        price_list = data.history(stock, "price", context.historical_bars, "1d").tolist()
        log = []
        X = []
        y = []
        log_returns = []
        feature = []
        
        #produces logged returns of the last 150 trading days
        for i in range (1, len(price_list)):
            log_returns.append( math.log( (price_list[i]) / (price_list[i-1]) ) )
            
        #look at the std/volatility of the last 50 and 20 trading days. Take their ratio and append it to feature.    
        for i in range (0, context.historical_bars - context.long_range):
            log = log_returns[0+i : context.long_range+i]
            long_vol = numpy.std(log)
            short_vol = numpy.std(log[-context.short_range:])
            feature.append(long_vol / short_vol) 
        
        start_bar = context.feature_window
        bar = start_bar
        
        #We break feature into feature windows. 
        while bar < len(feature)-2:
            #making our labels
            """end_price = VXX_price_list[bar+1]
            begin_price = VXX_price_list[bar]
            
            if end_price > begin_price:
                label = 1
            else:
                label = -1"""
            difference = ((VXX_price_list[bar+2]-VXX_price_list[bar])/(VXX_price_list[bar]))
            if difference > .02:
                label = 1
            elif difference <-.02: 
                label = -1
            else:
                label = 0
                
            window = []
            xx = 0
            
            for _ in range(context.feature_window):
                feat = feature[bar-(context.feature_window-xx)]
                window.append(feat)
                xx += 1
            X.append(window)
            y.append(label)
            bar+=1
        
        
        clf = RandomForestClassifier()

        last_prices = feature[-context.feature_window:]

        X.append(last_prices)
        X = preprocessing.scale(X)

        current_features = X[-1]
        X = X[:-1]

        clf.fit(X,y)
        context.p = clf.predict(current_features)[0]
        if context.p == 1:
            context.buy.append(context.p)
         

        print(('Prediction',context.p))
            
def buying (context,data):
    if context.p == 1:
        order_target_percent(sid(38054),1)
        #order_target_percent(sid(38055),-.5)
    
    elif context.p == -1: 
        order_target_percent(sid(38054),-1)
    else:
        order_target_percent(sid(38054),0)
        
        #order_target_percent(sid(38055),.5)
    """else: 
        order_target_percent(sid(14516),0)
        order_target_percent(sid(38055),0)"""
    """elif len(context.buy) == 2:
        order_target_percent(sid(38054),1)
        order_target_percent(sid(38055),0)
    elif len(context.buy) == 1:
        order_target_percent(sid(38054),.5)
        order_target_percent(sid(38055),-.5)"""
     
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
"""def buying(context, data):
    prices = history(bar_count = context.historical_bars, frequency='1d', field='price')

    for stock in context.stocks:
        try:
            ma1 = data[stock].mavg(50)
            ma2 = data[stock].mavg(200)

            start_bar = context.feature_window
            price_list = prices[stock].tolist()

            X = []
            y = []

            bar = start_bar

            # feature creation
            while bar < len(price_list)-1:
                try:
                    end_price = price_list[bar+1]
                    begin_price = price_list[bar]

                    pricing_list = []
                    xx = 0
                    for _ in range(context.feature_window):
                        price = price_list[bar-(context.feature_window-xx)]
                        pricing_list.append(price)
                        xx += 1

                    features = np.around(np.diff(pricing_list) / pricing_list[:-1] * 100.0, 1)


                    #print(features)

                    if end_price > begin_price:
                        label = 1
                    else:
                        label = -1

                    bar += 1
                    X.append(features)
                    y.append(label)

                except Exception as e:
                    bar += 1
                    print(('feature creation',str(e)))




            clf = RandomForestClassifier()

            last_prices = price_list[-context.feature_window:]
            current_features = np.around(np.diff(last_prices) / last_prices[:-1] * 100.0, 1)

            X.append(current_features)
            X = preprocessing.scale(X)

            current_features = X[-1]
            X = X[:-1]

            clf.fit(X,y)
            p = clf.predict(current_features)[0]

            print(('Prediction',p))
            if p == 1:
                order_target_percent(stock,0.11)
            elif p == -1:
                order_target_percent(stock,-0.11)            

        except Exception as e:
            print(str(e))
            
            
    record('ma1',ma1)
    record('ma2',ma2)
    record('Leverage',context.account.leverage)"""
There was a runtime error.

And finally a third. Haven't run anymore backtests, so I'm not sure if doing so will provide another variation.

Clone Algorithm
4
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from collections import Counter
import numpy 
import math


def initialize(context):
    context.assets = [sid(26807)]
                    
                      
    context.VXX = sid(38054)
    context.stocks = symbols('XLY',  # XLY Consumer Discrectionary SPDR Fund   
                           'XLF',  # XLF Financial SPDR Fund  
                           'XLK',  # XLK Technology SPDR Fund  
                           'XLE',  # XLE Energy SPDR Fund  
                           'XLV',  # XLV Health Care SPRD Fund  
                           'XLI',  # XLI Industrial SPDR Fund  
                           'XLP',  # XLP Consumer Staples SPDR Fund   
                           'XLB',  # XLB Materials SPDR Fund  
                           'XLU')  # XLU Utilities SPRD Fund
    
    context.historical_bars = 200
    context.feature_window = 10
    context.long_range = 50 #the long range which we compare volatilities for
    context.short_range = 20
    context.buy = []
    context.p =0
    schedule_function(
        func=buying,
        date_rule=date_rules.every_day(),
        time_rule=time_rules.market_open(minutes=45),
        half_days=True
  )

def before_trading_start(context,data):
    
    context.buy = []
    VXX_price_list = data.history(context.VXX, "price", context.historical_bars - context.long_range, "1d")
    VXZ_price_list = data.history(sid(38055), "price", context.historical_bars - context.long_range, "1d")
   
    
    
    for stock in context.assets:
        
        price_list = data.history(stock, "price", context.historical_bars, "1d").tolist()
        log = []
        X = []
        y = []
        log_returns = []
        feature = []
        
        #produces logged returns of the last 150 trading days
        for i in range (1, len(price_list)):
            log_returns.append( math.log( (price_list[i]) / (price_list[i-1]) ) )
            
        #look at the std/volatility of the last 50 and 20 trading days. Take their ratio and append it to feature.    
        for i in range (0, context.historical_bars - context.long_range):
            log = log_returns[0+i : context.long_range+i]
            long_vol = numpy.std(log)
            short_vol = numpy.std(log[-context.short_range:])
            feature.append(long_vol / short_vol) 
        
        start_bar = context.feature_window
        bar = start_bar
        
        #We break feature into feature windows. 
        while bar < len(feature)-2:
            #making our labels
            """end_price = VXX_price_list[bar+1]
            begin_price = VXX_price_list[bar]
            
            if end_price > begin_price:
                label = 1
            else:
                label = -1"""
            difference = ((VXX_price_list[bar+2]-VXX_price_list[bar])/(VXX_price_list[bar]))
            if difference > .02:
                label = 1
            elif difference <-.02: 
                label = -1
            else:
                label = 0
                
            window = []
            xx = 0
            
            for _ in range(context.feature_window):
                feat = feature[bar-(context.feature_window-xx)]
                window.append(feat)
                xx += 1
            X.append(window)
            y.append(label)
            bar+=1
        
        
        clf = RandomForestClassifier()

        last_prices = feature[-context.feature_window:]

        X.append(last_prices)
        X = preprocessing.scale(X)

        current_features = X[-1]
        X = X[:-1]

        clf.fit(X,y)
        context.p = clf.predict(current_features)[0]
        if context.p == 1:
            context.buy.append(context.p)
         

        print(('Prediction',context.p))
            
def buying (context,data):
    if context.p == 1:
        order_target_percent(sid(38054),1)
        #order_target_percent(sid(38055),-.5)
    
    elif context.p == -1: 
        order_target_percent(sid(38054),-1)
    else:
        order_target_percent(sid(38054),0)
        
        #order_target_percent(sid(38055),.5)
    """else: 
        order_target_percent(sid(14516),0)
        order_target_percent(sid(38055),0)"""
    """elif len(context.buy) == 2:
        order_target_percent(sid(38054),1)
        order_target_percent(sid(38055),0)
    elif len(context.buy) == 1:
        order_target_percent(sid(38054),.5)
        order_target_percent(sid(38055),-.5)"""
     
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
"""def buying(context, data):
    prices = history(bar_count = context.historical_bars, frequency='1d', field='price')

    for stock in context.stocks:
        try:
            ma1 = data[stock].mavg(50)
            ma2 = data[stock].mavg(200)

            start_bar = context.feature_window
            price_list = prices[stock].tolist()

            X = []
            y = []

            bar = start_bar

            # feature creation
            while bar < len(price_list)-1:
                try:
                    end_price = price_list[bar+1]
                    begin_price = price_list[bar]

                    pricing_list = []
                    xx = 0
                    for _ in range(context.feature_window):
                        price = price_list[bar-(context.feature_window-xx)]
                        pricing_list.append(price)
                        xx += 1

                    features = np.around(np.diff(pricing_list) / pricing_list[:-1] * 100.0, 1)


                    #print(features)

                    if end_price > begin_price:
                        label = 1
                    else:
                        label = -1

                    bar += 1
                    X.append(features)
                    y.append(label)

                except Exception as e:
                    bar += 1
                    print(('feature creation',str(e)))




            clf = RandomForestClassifier()

            last_prices = price_list[-context.feature_window:]
            current_features = np.around(np.diff(last_prices) / last_prices[:-1] * 100.0, 1)

            X.append(current_features)
            X = preprocessing.scale(X)

            current_features = X[-1]
            X = X[:-1]

            clf.fit(X,y)
            p = clf.predict(current_features)[0]

            print(('Prediction',p))
            if p == 1:
                order_target_percent(stock,0.11)
            elif p == -1:
                order_target_percent(stock,-0.11)            

        except Exception as e:
            print(str(e))
            
            
    record('ma1',ma1)
    record('ma2',ma2)
    record('Leverage',context.account.leverage)"""
There was a runtime error.

You're using a random forest which will give you different results each time if you don't set a seed. Try this one.

clf = RandomForestClassifier(random_state=33)

RandomForestClassifier is using different random numbers each time and so the model it fits and thus the trades you make are different each time. On a related note, your model is pretty fragile if different random seeds change things so much.

(On preview, what Georges said)

It should also be noted that sklearns random forest has extremely low default ensemble size (10 classifiers!) so that increases variability even more.