Back to Community
Machine Learning Models And Estimators

First off: THIS IS AN EDITED (albeit rather heavily) VERSION OF: Simple Machine Learning Example Mk II By: Gus Gordon

I decided that this needed own post since the algorithm doesn't really resemble the original anymore in terms of its methodology.

The main point of this post is to outline something I haven't seen anyone else outline:

If you want repeatable, consistent, results, you MUST use:

context.model.n_estimators = 100 #or more, anything past 250 will drastically slow down the backtest and wont be too beneficial  

Attached is an algorithm that implements n_estimators and only trades SPXL and XIV.

For further increases in accuracy and consistency:

context.model.min_samples_leaf = 2 #or more  
Clone Algorithm
106
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
#SPXL|XIV
#Author: Jacob S.
#Inspired by: An Intro To Machine Learning
#Purpose: Live Trading
#Date: 04/12/2017

from sklearn.ensemble import ExtraTreesRegressor
import numpy as np

def initialize(context):
    set_commission(commission.PerShare(cost=0.0, min_trade_cost=0.0))
    context.security = sid(3951) 
    context.model = ExtraTreesRegressor()
    context.model.min_samples_leaf = 1
    context.model.n_estimators = 100
    context.lookback = 5 
    context.history_range = 15 
    context.max_leverage = [0]
    context.DayCounter = 0
    context.buy = []
    schedule_function(create_model, date_rules.week_start(), time_rules.market_close(minutes=10))
    schedule_function(trade, date_rules.week_start(), time_rules.market_open(minutes=1))
    schedule_function(sell, date_rules.week_start(), time_rules.market_open(minutes=5))
    schedule_function(buy, date_rules.week_start(), time_rules.market_open(minutes=10))
    schedule_function(day_end, date_rules.every_day(), time_rules.market_close())
    
def before_trading_start (context, data):
    print('=============================================================================================================' + '\n' )

def create_model(context, data):
    recent_prices = data.history(context.security, 'price', context.history_range, '1d').values
    recent_volumes = data.history(context.security, 'volume', context.history_range, '1d').values
    price_changes = np.diff(recent_prices).tolist()
    volume_changes = np.diff(recent_volumes).tolist()
    #INPUT
    X = []
    #OUTPUT
    Y = [] 
    for i in range(context.history_range-context.lookback-1):
        X.append(price_changes[i:i+context.lookback] + volume_changes[i:i+context.lookback]) 
        Y.append(price_changes[i+context.lookback]) 
    context.model.fit(X, Y) 

def trade(context, data):
    context.buy = []
    if context.model: 
        recent_prices = data.history(context.security, 'price', context.lookback+1, '1d').values
        recent_volumes = data.history(context.security, 'volume', context.lookback+1, '1d').values
        
        price_changes = np.diff(recent_prices).tolist()
        volume_changes = np.diff(recent_volumes).tolist()
        prediction = context.model.predict(price_changes + volume_changes)
        record(Prediction = prediction)  
        if .4 > prediction >= 0.1 :
            S = sid(37514)
            context.buy.append(S)
            
        elif prediction < -0.0:
            S = sid(40516)
            context.buy.append(S)
        else:
            order_target_percent(sid(37514), 0.0)
            order_target_percent(sid(40516), 0.0)

def sell (context, data):
    for S in context.portfolio.positions:
        if S not in context.buy:
            order_target_percent(S,0)
            
def buy (context, data):
    for S in context.buy:
        order_target_percent(S, 0.99)
        
def day_end (context, data):
    context.DayCounter += 1
    ReturnRate = ((context.portfolio.returns*100)/context.DayCounter)
    SecCount = 0
    for sec in context.portfolio.positions:
        SecCount += 1
    print('| END OF DAY SUMMARY |')
    print('Day:                            ' + str(context.DayCounter))
    print('Initial Portfolio Value:        $' + str(context.portfolio.starting_cash))
    print('Current Portfolio Value:        $' + str(context.portfolio.portfolio_value))
    print('Profit & Loss:                  $' + str(context.portfolio.pnl))
    print('Total Returns:                  ' + str(context.portfolio.returns * 100) + '%')
    print(' Daily Return:                  ' + str(ReturnRate) + '%')
    print('Leverage:                       ' + str(context.account.leverage * 100) + '%')
    print('Maximum      Leverage           ' + str(context.max_leverage[-1] * 100) + '%')
    print('Positions:                      ' + str(SecCount))
    print('Initial Margin Requirement:     ' + str(context.account.initial_margin_requirement))
    print('Maintenance Margin Requirement: ' + str(context.account.maintenance_margin_requirement))

def handle_data (context, data):
    leverage = context.account.leverage
    for num in context.max_leverage:
        if leverage > num:
            context.max_leverage.remove(num)
            context.max_leverage.append(leverage)

    record(Max_Leverage = context.max_leverage[-1])
    record(Leverage = context.account.leverage)  
    
    
    
    
There was a runtime error.
8 responses

Data From Backtest

Loading notebook preview...

Hey Jacob, when you run this say 12/01/2010 it has a massive drawdown of about 80% but then it takes off, is this 1st year a learning period? Or just doesn't perform well during 2010?

yeah, before 2012 this doesn't do well:

you may notice, this algorithm actually models Intel - then trades a SPY 3X ETF and XIV. this will work well if you model SPY, but there are points wherein XIV has dropped drastically - these are avoided if you model over INTC instead of SPY.

Hello Jacob,

Can you elaborate more on why INTC was you choice of modelling?

Since I am not sure what some of the methods are doing it is not surprising that I am confused, however, there is one thing I am truly baffled by. Why, when I "Build Algorithm" the unmodified algo "n" times in a row, do I get "n" different results?

Is there a random function/method I am missing?

Hi Paul, most machine learning algorithms use random numbers. If you want to make your work reproducible, you need to set a seed for the random state. In sklearn you can use for example the following:
random_state = 0
where is 0 the seed

BTW, extratrees = randomized decision trees

I found that by increasing the value of context.model.min_samples_leaf, also improves predictability as you state above.

My idea on how to use what you provided is to set up 4-10 diverse pairs of equities (preferably etfs) that trade inversely such as SPY & XIV and create predictors for both. Then, at the pair level, trigger buy/sell quantities of each based on their individual as well as combined "predictions."

If I can teach myself enough python, I would also like to have it self correcting, by looking at the previous predictions, and the performance based on the previous predictions and then self tweak the predictor thresholds.

For fun, I began to implement what I suggested above. Instead of just SPXL vs XIV, I included QQQ vs VIG.

The returns aren't as astronomical as the original, but there is a bit of diversification.

Since I do not know how the ExtraTreesRegressor works, and I'm not sure I would understand more than a high level description, I wonder how the prediction decision points of .4, .01 and -0.00 were determined. Of course the reasons I am interested are;

A) I have no idea whether they should be different for different pairs

B) I hate literal decision points that can't fluctuate based on data.

So the real question in my mind is can the decision points be adjusted either from;

1) Looking back at historical data using built in functions.

2) Gathering predictions, and associated P&L per security, and tweaking it as we make mistakes & learn.

Clone Algorithm
17
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
#SPXL|XIV
#Author: Jacob S.
#Inspired by: An Intro To Machine Learning
#Purpose: Live Trading
#Date: 04/12/2017

from sklearn.ensemble import ExtraTreesRegressor
import numpy as np

def initialize(context):
    minSampleLeaves = 10
    nEstimators = 150
    set_commission(commission.PerShare(cost=0.0, min_trade_cost=0.0))
    context.security1 = sid(37514)
    context.security2 = sid(40516)
    context.security3 = sid(19920)
    context.security4 = sid(28364)
    context.model1 = ExtraTreesRegressor()
    context.model1.min_samples_leaf = minSampleLeaves
    context.model1.n_estimators = nEstimators
    context.model2 = ExtraTreesRegressor()
    context.model2.min_samples_leaf = minSampleLeaves
    context.model2.n_estimators = nEstimators
    context.model3 = ExtraTreesRegressor()
    context.model3.min_samples_leaf = minSampleLeaves
    context.model3.n_estimators = nEstimators
    context.model4 = ExtraTreesRegressor()
    context.model4.min_samples_leaf = minSampleLeaves
    context.model4.n_estimators = nEstimators
    context.lookback = 5 
    context.history_range = 15 
    context.max_leverage = [0]
    context.DayCounter = 0
    context.buy = []
    schedule_function(create_models, date_rules.week_start(), time_rules.market_close(minutes=10))
    schedule_function(trade, date_rules.week_start(), time_rules.market_open(minutes=1))
    schedule_function(sell, date_rules.week_start(), time_rules.market_open(minutes=5))
    schedule_function(buy, date_rules.week_start(), time_rules.market_open(minutes=10))
    schedule_function(day_end, date_rules.every_day(), time_rules.market_close())
    
def before_trading_start (context, data):
    print('=============================================================================================================' + '\n' )

def create_models(context, data):
    context.model1 = buildModel(context.security1,context.model1,context,data)
    context.model2 = buildModel(context.security2,context.model2,context,data)
    context.model3 = buildModel(context.security3,context.model3,context,data)
    context.model4 = buildModel(context.security4,context.model4,context,data)
'''
    recent_prices = data.history(context.security, 'price', context.history_range, '1d').values
    recent_volumes = data.history(context.security, 'volume', context.history_range, '1d').values
    price_changes = np.diff(recent_prices).tolist()
    volume_changes = np.diff(recent_volumes).tolist()
    #INPUT
    X = []
    #OUTPUT
    Y = [] 
    for i in range(context.history_range-context.lookback-1):
        X.append(price_changes[i:i+context.lookback] + volume_changes[i:i+context.lookback]) 
        Y.append(price_changes[i+context.lookback]) 
    context.model.fit(X, Y) 
'''

def trade(context, data):
    context.buy = []
    if context.model1: 
        prediction1 = getPrediction(context.security1, context.model1, context, data)
        prediction2 = getPrediction(context.security2, context.model2, context, data)
        prediction3 = getPrediction(context.security3, context.model3, context, data)
        prediction4 = getPrediction(context.security4, context.model4, context, data)
#        predictionMean12 = (prediction1 + prediction2)/2
#        predictionMean34 = (prediction3 + prediction4)/2
        record(Prediction1 = prediction1)  
        record(Prediction2 = prediction2)  
#        record(PredictionMean12 = predictionMean12)

        if (.4 > prediction1 >= 0.1) or (.4 > prediction2 >= 0.1):
            if (.4 > prediction1 >= 0.1):
                S = context.security1
                context.buy.append(S)
            if (.4 > prediction2 >= 0.1):
                S = context.security2
                context.buy.append(S)
        elif (prediction1 < -0.0) or (prediction2 < -0.0):
            if(prediction1 < -0.0):
                S = context.security2
                context.buy.append(S)
            if (.4 > prediction1 >= 0.1):
                S = context.security1
                context.buy.append(S)
        else:
            order_target_percent(context.security1, 0.0)
            order_target_percent(context.security2, 0.0)
            
        if (.4 > prediction3 >= 0.1) or (.4 > prediction4 >= 0.1):
            if (.4 > prediction3 >= 0.1):
                S = context.security3
                context.buy.append(S)
            if (.4 > prediction2 >= 0.1):
                S = context.security4
                context.buy.append(S)
        elif (prediction3 < -0.0) or (prediction4 < -0.0):
            if (prediction3 < -0.0):
                S = context.security4
                context.buy.append(S)
            if (.4 > prediction3 >= 0.1):
                S = context.security3
                context.buy.append(S)
        else:
            order_target_percent(context.security3, 0.0)
            order_target_percent(context.security4, 0.0)

'''
        if (.4 > prediction >= 0.1) or (.4 > prediction2 >= 0.1) :
            S = sid(37514)
            context.buy.append(S)
            
        elif (prediction < -0.0) or (prediction2 < -0.0):
            S = sid(40516)
            context.buy.append(S)
        else:
            order_target_percent(sid(37514), 0.0)
            order_target_percent(sid(40516), 0.0)
'''
'''
        if .4 > prediction >= 0.1 :
            S = sid(37514)
            context.buy.append(S)
            
        elif prediction < -0.0:
            S = sid(40516)
            context.buy.append(S)
        else:
            order_target_percent(sid(37514), 0.0)
            order_target_percent(sid(40516), 0.0)
'''

def buildModel(security,model,context,data):
    recent_prices = data.history(security, 'price', context.history_range, '1d').values
    recent_volumes = data.history(security, 'volume', context.history_range, '1d').values
    price_changes = np.diff(recent_prices).tolist()
    volume_changes = np.diff(recent_volumes).tolist()
    #INPUT
    X = []
    #OUTPUT
    Y = [] 
    for i in range(context.history_range-context.lookback-1):
        X.append(price_changes[i:i+context.lookback] + volume_changes[i:i+context.lookback]) 
        Y.append(price_changes[i+context.lookback]) 
    model.fit(X, Y)
    return(model)

def getPrediction(security,model,context,data):
    recent_prices = data.history(security, 'price', context.lookback+1, '1d').values
    recent_volumes = data.history(security, 'volume', context.lookback+1, '1d').values

    price_changes = np.diff(recent_prices).tolist()
    volume_changes = np.diff(recent_volumes).tolist()
    prediction = model.predict(price_changes + volume_changes)
    return(prediction)

def sell (context, data):
    for S in context.portfolio.positions:
        if S not in context.buy:
            order_target_percent(S,0)
            
def buy (context, data):
    securitiesToBuy = len(context.buy)
    if securitiesToBuy > 0.00:
        buyQTY = .99/securitiesToBuy
        for S in context.buy:
            order_target_percent(S, buyQTY)
        
def day_end (context, data):
    context.DayCounter += 1
    ReturnRate = ((context.portfolio.returns*100)/context.DayCounter)
    SecCount = 0
    for sec in context.portfolio.positions:
        SecCount += 1
    print('| END OF DAY SUMMARY |')
    print('Day:                            ' + str(context.DayCounter))
    print('Initial Portfolio Value:        $' + str(context.portfolio.starting_cash))
    print('Current Portfolio Value:        $' + str(context.portfolio.portfolio_value))
    print('Profit & Loss:                  $' + str(context.portfolio.pnl))
    print('Total Returns:                  ' + str(context.portfolio.returns * 100) + '%')
    print(' Daily Return:                  ' + str(ReturnRate) + '%')
    print('Leverage:                       ' + str(context.account.leverage * 100) + '%')
    print('Maximum      Leverage           ' + str(context.max_leverage[-1] * 100) + '%')
    print('Positions:                      ' + str(SecCount))
    print('Initial Margin Requirement:     ' + str(context.account.initial_margin_requirement))
    print('Maintenance Margin Requirement: ' + str(context.account.maintenance_margin_requirement))

def handle_data (context, data):
    leverage = context.account.leverage
    for num in context.max_leverage:
        if leverage > num:
            context.max_leverage.remove(num)
            context.max_leverage.append(leverage)

#    record(Max_Leverage = context.max_leverage[-1])
#    record(Leverage = context.account.leverage)  
    
    
    
    
There was a runtime error.