Combine Different Machine Learning Methods

I have seen Gus Gordon's algorithm which uses machine learning method--Random Forest.
Simple Machine Learning Example

I have tried some other machine learning methods, including svm and AdaBoost. It turns out that svm performs the best in the same case of the single security--Boeing. Also, I attempted to adjust the parameters involved in the algo, including the number of training samples and the length of features.

I think only use prices as features may not be enough, and I add the volumes to the feature vector.

Inspired by the idea of ensemble learning, which enhances the performance of weak classifiers, I combined three classifiers together, gave them different voting weights. In addition, the weights of classifiers are parameters that can be learned further, but it would be a little bit complicated.

Fortunately, the algo performs somewhat better than before. However, In my experiments, it seems that the performance of the algo depends on the security I choose to a great extent, which means it's not as stable as the benchmark. Maybe a portfolio of more securities would be better.

This is the first time I post my idea. If anything wrong, please just tell me. Thanks!

# Use three machine learning methods. More here: http://scikit-learn.org/stable/user_guide.html
from sklearn import svm
from collections import deque
import numpy as np

def initialize(context):
context.security = sid(698) # Boeing
context.window_length = 10 # Amount of prior bars to study

context.clf1 = RandomForestClassifier(n_estimators=20)
context.clf3 = svm.SVC()

# deques are lists with a maximum length where old entries are shifted out
context.recent_prices = deque(maxlen=context.window_length+2) # Stores recent prices
context.recent_volumes = deque(maxlen=context.window_length+2) # Stores recent volumes
context.X = deque(maxlen=500) # Independent, or input variables
context.Y = deque(maxlen=500) # Dependent, or output variable

context.prediction1 = 0
context.prediction2 = 0
context.prediction3 = 0

def handle_data(context, data):
context.recent_prices.append(data[context.security].price) # Update the recent prices
context.recent_volumes.append(data[context.security].volume) # Update the recent volumes

if len(context.recent_prices) == context.window_length+2: # If there's enough recent price data
# Make a list of 1's and 0's, 1 when the price increased from the prior bar
price_changes = np.diff(context.recent_prices) > 0
volume_changes = np.diff(context.recent_volumes) > 0
feature = np.append(price_changes[:-1], volume_changes[:-1])

context.X.append(feature) # Add independent variables, the prior changes
context.Y.append(price_changes[-1]) # Add dependent variable, the final change

if len(context.Y) >= 50: # There needs to be enough data points to make a good model
context.clf1.fit(context.X, context.Y) # Generate the model1
context.clf2.fit(context.X, context.Y) # Generate the model2
context.clf3.fit(context.X, context.Y) # Generate the model3

target_feature = np.append(price_changes[1:], volume_changes[1:])
context.prediction1 = context.clf1.predict(target_feature)
context.prediction2 = context.clf2.predict(target_feature)
context.prediction3 = context.clf3.predict(target_feature)

# use weighted voting to get position percentage
position = 0
if context.prediction1:
position += 0.1
if context.prediction2:
position += 0.1
if context.prediction3:
position += 0.8
log.info(position)
order_target_percent(context.security, position)

record(prediction1=int(context.prediction1), prediction2 = int(context.prediction2), prediction3=int(context.prediction3))

2 responses

I did some backtest to tuning the SVM parameter for GLD specific security, it could have lower volatility then buy & hold strategy,FYI

183
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
# Use three machine learning methods. More here: http://scikit-learn.org/stable/user_guide.html
from sklearn import svm
from collections import deque
import numpy as np

def initialize(context):
set_benchmark(sid(26807))
context.security = sid(26807) # GLD

context.window_length = 10# Amount of prior bars to study

context.clf1 = RandomForestClassifier(n_estimators=20)  #threee classification algoritihm
context.clf3 = svm.SVC()

# deques are lists with a maximum length where old entries are shifted out
context.recent_prices = deque(maxlen=context.window_length+2) # Stores recent prices
context.recent_volumes = deque(maxlen=context.window_length+2) # Stores recent volumes
context.recent_stddev = deque(maxlen=context.window_length+2)
context.recent_vmap = deque(maxlen=context.window_length+2)
context.X = deque(maxlen=50) # Independent, or input variables
context.Y = deque(maxlen=50) # Dependent, or output variable

context.prediction1 = 0
context.prediction2 = 0
context.prediction3 = 0

def handle_data(context, data):

context.recent_prices.append(data[context.security].price) # Update the recent prices
context.recent_volumes.append(data[context.security].volume) # Update the recent volumes
if data[context.security].stddev(5) :
context.recent_stddev.append(data[context.security].stddev(5)) # Update the recent volumes
else:
context.recent_stddev.append(0.0) # Update the recent volumes

context.recent_vmap.append(data[context.security].vwap(5)) # Update the recent volumes

if len(context.recent_prices) == context.window_length+2: # If there's enough recent price data
# Make a list of 1's and 0's, 1 when the price increased from the prior bar
price_changes = np.diff(context.recent_prices) > 0
volume_changes = np.diff(context.recent_volumes) > 0
stddev_changes  = np.diff(context.recent_stddev) > 0
vmap_changes  = np.diff(context.recent_vmap) > 0
feature = np.append(price_changes[:-1],vmap_changes[:-1])
feature =  np.append(feature,stddev_changes[:-1])
feature =  np.append(feature,volume_changes[:-1])
context.X.append(feature) # Add independent variables, the prior changes
context.Y.append(price_changes[-1]) # Add dependent variable, the final change

if len(context.Y) >= 50: # There needs to be enough data points to make a good model
context.clf1.fit(context.X, context.Y) # Generate the model1
context.clf2.fit(context.X, context.Y) # Generate the model2
context.clf3.fit(context.X, context.Y) # Generate the model3

target_feature = np.append(price_changes[1:], vmap_changes[1:])
target_feature = np.append(target_feature, stddev_changes[1:])
target_feature = np.append(target_feature, volume_changes[1:])
context.prediction1 = context.clf1.predict(target_feature)
context.prediction2 = context.clf2.predict(target_feature)
context.prediction3 = context.clf3.predict(target_feature)

# use weighted voting to get position percentage  from three classification algoritihm
position = 0
if context.prediction1:
position += 0
if context.prediction2:
position += 0
if context.prediction3:
position += 1
log.info(position)
order_target_percent(context.security, position)

record(prediction1=int(context.prediction1), prediction2 = int(context.prediction2),
prediction3=int(context.prediction3))
record(cash=context.portfolio.cash)
There was a runtime error.

Great post! I highly support ensemble learning. It would be interesting to add more features such as differently sized simple moving average windows, fundamental and technical indicators. And combine linear (e.g. Logistic Regression) and non-linear (e.g. Neural Networks, XGboost, Random Forest) algorithms to improve accuracy.