Back to Community
A trial of using machine learning method

Have been here for a long time, but just start to write something recently. Thanks people in Quantopian for creating such a good community.

Recently, I read the tutorial of applying machine learning method. Here is the link:

Tomas includes many fundamental factor and I want to write something using technical data. Here is a simple and easy demo. It can be easily changed to other machine learning model, such as tree model in sklearn. Hope it will help someone.

From research environment, the factor learned by ridge regression has IC around 0.007 and from return analysis, it looks like it will achieve position returns. However, in algorithm environment, the performance is not good at all, no matter how I change the parameters and frequency of rebalance. It looks like slippage is one cause, but I don't understand why.

I have searched several post of machine learning methods in forum, but they don't perform well. I think it is a promising method, but currently I didn't find a good result.

Loading notebook preview...
1 response

Here is the backtest in algorithm environment. Monotone decreasing returns.

Clone Algorithm
Total Returns
Max Drawdown
Benchmark Returns
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
This is a template algorithm on Quantopian for you to adapt and fill in.
import quantopian.algorithm as algo

from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import CustomFactor, SimpleBeta, Returns
from import USEquityPricing
from import Fundamentals, factset
from quantopian.pipeline.filters import QTradableStocksUS, Q500US,Q1500US
from quantopian.pipeline.classifiers.morningstar import Sector
import quantopian.optimize as opt
from sklearn import preprocessing
from quantopian.pipeline.experimental import risk_loading_pipeline
from quantopian.pipeline.filters import QTradableStocksUS
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
#from sklearn.kernel_ridge import KernelRidge
from sklearn.decomposition import PCA
from sklearn.preprocessing import Imputer, StandardScaler
from sklearn.svm import SVR
from scipy.stats.mstats import winsorize
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge

def initialize(context):
    Called once at the start of the algorithm.
    # Rebalance every day, 1 hour after market open.

    # Record tracking variables at the end of each day.
    #set_commission(commission.PerShare(cost=0.00, min_trade_cost=0))
    #set_slippage(slippage.VolumeShareSlippage(volume_limit=500, price_impact=0))
    # Create our dynamic stock selector.
    algo.attach_pipeline(make_pipeline(), 'pipeline')

N_SLICE = 10

class LM(CustomFactor):
    def __init__(self, *args, **kwargs):
        CustomFactor.__init__(self, *args, **kwargs)
        self._reg = Ridge(alpha=0.1)
        self._trained = False
        self._count = 0
    def _compute(self, *args, **kwargs):
        ret = CustomFactor._compute(self, *args, **kwargs)
        return ret
    def _transform(self, value, fidx):
        nanmask = np.isnan(value)
        idx_all_na = np.argwhere(np.all(nanmask,axis=0)).ravel()
        if len(idx_all_na)>0:
            value[:,idx_all_na] = np.nanmedian(value)
        idx_any_na = np.argwhere(np.any(nanmask,axis=0)).ravel()
        if len(idx_any_na)>0:
            for idx in idx_any_na:
                value[nanmask[:,idx],idx] = np.nanmedian(value[:,idx])
        percent = (np.arange(N_SLICE+1))*100/(N_SLICE+1)
        pct = np.percentile(value, percent)
        values = [np.logical_and(value>=pct[i],value<pct[i+1]) for i in range(N_SLICE)]
        values = np.dstack(values)
        return values
    def _train(self,today,assets,returns,inputs):
        inputs = inputs[:-(DAYS_FORWARD+1)]
        returns = returns[(DAYS_FORWARD+1):]
        returns = returns.reshape(-1)
        inputs = inputs.reshape(-1,inputs.shape[-1])
        mask = np.logical_not(np.isnan(returns)).ravel()
        returns = returns[mask]
        inputs = inputs[mask]
    def compute(self, today, assets, out, returns, *inputs):
        inputs = [self._transform(input_,idx) for idx,input_ in enumerate(inputs)]
        inputs = np.dstack(inputs)
        last_input = inputs[-1]
        out[:] = self._reg.predict(last_input)

def make_pipeline():
    ROE = Fundamentals.roe
    assets_growth = Returns(inputs = [Fundamentals.total_assets],window_length=252)
    #leverage   =  factset.Fundamentals.debt.latest / factset.Fundamentals.assets.latest
    assets = Fundamentals.total_assets
    ROE_growth = Returns(inputs = [Fundamentals.roe],window_length=252)
    net_income_margin = Fundamentals.net_margin
    sector = Sector()
    DAYS_BACK = [1,3,5,7,10,20,60,120,250,500]
    n_days_returns = [Returns(inputs=[USEquityPricing.close],window_length=lag+1) for lag in DAYS_BACK]
    returns = Returns(inputs=[], mask = Q500US(),
                      window_length=DAYS_FORWARD + 1)
    inputs = [returns,*n_days_returns]#,ROE,net_income_margin,assets_growth,assets,ROE_growth]
    lm = LM(inputs=inputs,window_length=100,mask=Q500US())
    df = dict()
    df['returns'] = returns
    df['lm'] = lm
    df['sector'] = sector
    df['open'] =
    screen = Q500US()& sector.notnull() &lm.notnull()
    pipeline = Pipeline(columns=df, screen = screen)
    return pipeline  

def before_trading_start(context, data):
    Called every day before market open.
    context.output = algo.pipeline_output('pipeline')

    # These are the securities that we are interested in trading each day.
    context.security_list = context.output.index

def rebalance(context, data):
    Execute orders according to our schedule_function() timing.
    predictions = context.output['lm']
    predictions = predictions.loc[data.can_trade(predictions.index)]
    n_long_short = min(100, len(predictions) // 2)
    predictions_top_bottom = pd.concat([
    objective = opt.MaximizeAlpha(predictions_top_bottom)
    constraint_gross_exposure = opt.MaxGrossExposure(1.1)
    constraint_concentration = opt.PositionConcentration.with_equal_bounds(-0.005,0.005)
    market_neutral = opt.DollarNeutral()
    sector_neutral = opt.NetGroupExposure.with_equal_bounds(
    constraint = [constraint_gross_exposure,constraint_concentration,market_neutral,sector_neutral]

def record_vars(context, data):
    Plot variables at the end of each day.

def handle_data(context, data):
    Called every minute.
There was a runtime error.