A trial of using machine learning method

Have been here for a long time, but just start to write something recently. Thanks people in Quantopian for creating such a good community.

Recently, I read the tutorial of applying machine learning method. Here is the link: https://www.quantopian.com/posts/machine-learning-on-quantopian-part-3-building-an-algorithm

Tomas includes many fundamental factor and I want to write something using technical data. Here is a simple and easy demo. It can be easily changed to other machine learning model, such as tree model in sklearn. Hope it will help someone.

From research environment, the factor learned by ridge regression has IC around 0.007 and from return analysis, it looks like it will achieve position returns. However, in algorithm environment, the performance is not good at all, no matter how I change the parameters and frequency of rebalance. It looks like slippage is one cause, but I don't understand why.

I have searched several post of machine learning methods in forum, but they don't perform well. I think it is a promising method, but currently I didn't find a good result.

5
1 response

Here is the backtest in algorithm environment. Monotone decreasing returns.

3
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
"""
This is a template algorithm on Quantopian for you to adapt and fill in.
"""
import quantopian.algorithm as algo

from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import CustomFactor, SimpleBeta, Returns
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import Fundamentals, factset
from quantopian.pipeline.classifiers.morningstar import Sector
import quantopian.optimize as opt
from sklearn import preprocessing
import sklearn
from sklearn.ensemble import RandomForestClassifier
#from sklearn.kernel_ridge import KernelRidge
from sklearn.decomposition import PCA
from sklearn.preprocessing import Imputer, StandardScaler
from sklearn.svm import SVR
from scipy.stats.mstats import winsorize
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge

def initialize(context):
"""
Called once at the start of the algorithm.
"""
# Rebalance every day, 1 hour after market open.
algo.schedule_function(
rebalance,
algo.date_rules.every_day(),
algo.time_rules.market_open(hours=1),
)

# Record tracking variables at the end of each day.
algo.schedule_function(
record_vars,
algo.date_rules.every_day(),
algo.time_rules.market_close(),
)
#set_slippage(slippage.VolumeShareSlippage(volume_limit=500, price_impact=0))
# Create our dynamic stock selector.
algo.attach_pipeline(make_pipeline(), 'pipeline')

N_SLICE = 10
DAYS_FORWARD = 1

class LM(CustomFactor):
def __init__(self, *args, **kwargs):
CustomFactor.__init__(self, *args, **kwargs)

self._reg = Ridge(alpha=0.1)
self._trained = False
self._count = 0

def _compute(self, *args, **kwargs):
ret = CustomFactor._compute(self, *args, **kwargs)
return ret

def _transform(self, value, fidx):

if len(idx_all_na)>0:
value[:,idx_all_na] = np.nanmedian(value)
if len(idx_any_na)>0:
for idx in idx_any_na:
percent = (np.arange(N_SLICE+1))*100/(N_SLICE+1)
pct = np.percentile(value, percent)
values = [np.logical_and(value>=pct[i],value<pct[i+1]) for i in range(N_SLICE)]
values = np.dstack(values)
return values

def _train(self,today,assets,returns,inputs):

inputs = inputs[:-(DAYS_FORWARD+1)]
returns = returns[(DAYS_FORWARD+1):]

returns = returns.reshape(-1)
inputs = inputs.reshape(-1,inputs.shape[-1])

self._reg.fit(inputs,returns)

def compute(self, today, assets, out, returns, *inputs):

inputs = [self._transform(input_,idx) for idx,input_ in enumerate(inputs)]
inputs = np.dstack(inputs)
self._train(today,assets,returns,inputs)

last_input = inputs[-1]

out[:] = self._reg.predict(last_input)

def make_pipeline():

ROE = Fundamentals.roe
assets_growth = Returns(inputs = [Fundamentals.total_assets],window_length=252)
#leverage   =  factset.Fundamentals.debt.latest / factset.Fundamentals.assets.latest
assets = Fundamentals.total_assets
ROE_growth = Returns(inputs = [Fundamentals.roe],window_length=252)
net_income_margin = Fundamentals.net_margin
sector = Sector()
DAYS_BACK = [1,3,5,7,10,20,60,120,250,500]

n_days_returns = [Returns(inputs=[USEquityPricing.close],window_length=lag+1) for lag in DAYS_BACK]

returns = Returns(inputs=[USEquityPricing.open], mask = Q500US(),
window_length=DAYS_FORWARD + 1)

inputs = [returns,*n_days_returns]#,ROE,net_income_margin,assets_growth,assets,ROE_growth]

df = dict()
df['returns'] = returns
df['lm'] = lm
df['sector'] = sector
df['open'] = USEquityPricing.open.latest

screen = Q500US()& sector.notnull() &lm.notnull()
pipeline = Pipeline(columns=df, screen = screen)
return pipeline

"""
Called every day before market open.
"""
context.output = algo.pipeline_output('pipeline')

# These are the securities that we are interested in trading each day.
context.security_list = context.output.index

def rebalance(context, data):
"""
Execute orders according to our schedule_function() timing.
"""
predictions = context.output['lm']

n_long_short = min(100, len(predictions) // 2)

predictions_top_bottom = pd.concat([
predictions.nlargest(n_long_short),
predictions.nsmallest(n_long_short),
])

objective = opt.MaximizeAlpha(predictions_top_bottom)

constraint_gross_exposure = opt.MaxGrossExposure(1.1)
constraint_concentration = opt.PositionConcentration.with_equal_bounds(-0.005,0.005)
market_neutral = opt.DollarNeutral()
sector_neutral = opt.NetGroupExposure.with_equal_bounds(
labels=context.output['sector'].dropna(),
min=-0.002,
max=0.002)
constraint = [constraint_gross_exposure,constraint_concentration,market_neutral,sector_neutral]
order_optimal_portfolio(objective=objective,constraints=constraint)

def record_vars(context, data):
"""
Plot variables at the end of each day.
"""
algo.record(num_positions=len(context.portfolio.positions))
pass

def handle_data(context, data):
"""
Called every minute.
"""
pass
There was a runtime error.