Back to Community
SPY Intraday return prediction with Random Forest

Hello Quantopians, I am realtively new to Quantopian and try to build my first algo. I am trying to fetch intraday return data of several equities, feed this data to a random forest classifier and let it make predictions on whether the SPY is going to yield negative or positive returns. Based on those predictions I want to long/short the SPY. However, I always get a lot of errors. Apparently my data.history function gets data of different time-periods depending on the equity chosen. Of course I need always the same time period for the equities to be able to feed them as features to the random forest. Does anyone know how to do this? Furthermore, I get the error that my chosen assets cannot all be traded on the same day. Again I must do sth. wrong because I only want to trade the SPY. All the data on other assets are just fetched to build the features for the random forest. Any advice?

Thank you so much in advance

Clone Algorithm
9
Loading...
Backtest from to with initial capital
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
def initialize(context):
    context.spy = sid(8554) # Trade SPY
    context.trainassets = [sid(8554), sid(36041),sid(14520),sid(43549),sid(36486),sid(26703),sid(14529),sid(26432),sid(21519),sid(26807),sid(40007),sid(19654),sid(19656),sid(39167),sid(23870),sid(23921),sid(43550),sid(1210774269088838)] #Assets used for training
    context.model = RandomForestClassifier()

    context.lookback = 1 # Look back 3 days
    context.history_range = 800 

    # Generate a new model every week
    schedule_function(create_model, date_rules.week_end(), time_rules.market_close(minutes=10))

    # Trade at the start of every day
   
    schedule_function(trade, date_rules.every_day(), time_rules.market_open(minutes=1))

def create_model(context, data):
    # Get the relevant daily open, close, volume, high, and low data
    recent_prices = data.history(context.trainassets,['open','close','high','low','volume'],context.history_range, '1d')
    spy = data.history(context.spy,['open','close'],context.history_range, '1d')

    #shift function shifts index
    time_lags = pd.DataFrame(index=recent_prices.index)
    time_lags['open']=recent_prices.open
    time_lags['close']=recent_prices.close
    time_lags['intraday_returns']=time_lags['close']/time_lags['open']-1
    time_lags['intraday_returns']=time_lags['intraday_returns'].fillna(0.0001)
    time_lags['lag1'] = (time_lags['intraday_returns'].shift(1)).fillna(0.0001)
    time_lags['lag2'] = (time_lags['intraday_returns'].shift(2)).fillna(0.0001)
    
    time_lags['spy_intraday_return'] = spy.close/spy.open-1
    classes= []
    for every in time_lags['spy_intraday_return']:
        if every > 0.005:
            x=0
        elif every <= 0.005 and every > 0:
            x=1
        elif every <= 0 and every > -0.005:
            x=2   
        else:
            x=3
        classes.append(x)
    classes = np.array(classes)
    time_lags['class'] = classes
    
    
    
    X = time_lags[['lag1','lag2']] # Independent, or input variables
    Y = time_lags['class'] # Dependent, or output variable
    context.model.fit(X, Y) # Generate our model

def trade(context, data):
    if context.model: # Check if our model is generated
        
        # Get recent prices
        new_recent_prices = data.history(context.trainassets,['open','close','high','low'], context.lookback, '1d')
        
        time_lags = pd.DataFrame(index=new_recent_prices.index)
        time_lags['open']=new_recent_prices.open
        time_lags['close']=new_recent_prices.close
        
        time_lags['intraday_returns']=time_lags['close']/time_lags['open']-1
        time_lags['intraday_returns']=time_lags['intraday_returns'].fillna(0.0001)
        time_lags['lag1'] = (time_lags['intraday_returns'].shift(1)).fillna(0.0001)
        time_lags['lag2'] = (time_lags['intraday_returns'].shift(2)).fillna(0.0001)
       
        
        
        X = time_lags[['lag1','lag2']]
        prediction = context.model.predict(X)
        if prediction ==0:
            order_target_percent(context.spy, 1.0)
        elif prediction == 1:
            order_target_percent(context.spy, 1.0)
        elif prediction == 2:
            order_target_percent(context.spy, 0)
        else:
            order_target_percent(context.spy, -1.0)

def handle_data(context, data):
    pass
There was a runtime error.