This is a simple ML demo I want to port to QSTK.

Dan

Clone Algorithm

57

Loading...

There was an error loading this backtest.

Backtest from
to
with
initial capital

Cumulative performance:

Algorithm
Benchmark

Custom data:

Total Returns

--

Alpha

--

Beta

--

Sharpe

--

Sortino

--

Max Drawdown

--

Benchmark Returns

--

Volatility

--

Returns | 1 Month | 3 Month | 6 Month | 12 Month |

Alpha | 1 Month | 3 Month | 6 Month | 12 Month |

Beta | 1 Month | 3 Month | 6 Month | 12 Month |

Sharpe | 1 Month | 3 Month | 6 Month | 12 Month |

Sortino | 1 Month | 3 Month | 6 Month | 12 Month |

Volatility | 1 Month | 3 Month | 6 Month | 12 Month |

Max Drawdown | 1 Month | 3 Month | 6 Month | 12 Month |

# /a/ks/q10/spy255.py # I should predict short term positions of SPY from sklearn import linear_model from sklearn.neighbors import KNeighborsClassifier from collections import deque import numpy as np import pandas as pd from pandas import DataFrame as df from pandas import Series def initialize(context): ct = context ct.is_len = 1400 # I should learn from this many observations ct.tkr = symbol('SPY') # I need help storing recent prices from data[] sorted by recent dates ct.rdt = {} # datetimes ct.rp = {} # prices ct.ip = {} # initial predictions ct.myeff = {} # initial predictions effectiveness ct.myg4 = {} # 4 of my last gains ct.ipval = 0.5 # bootstrap ip value ct.npval = 0.5 ct.kelly = 1.0 ct.kelly_base = 1.0 ct.kelly_x = 0.0 def handle_data(context, data): ct = context hc = 2 # I should use this to count every 2-hours for tkr in data: if (tkr in ct.rdt) == False : # I should initialize trackers of tkrs: ct.rdt[tkr] = deque(maxlen=ct.is_len) ct.rp[tkr] = deque(maxlen=ct.is_len) ct.ip[tkr] = deque(maxlen=ct.is_len) ct.myeff[tkr]= deque(maxlen=ct.is_len) ct.myg4[tkr] = deque(maxlen=4) # I should initialize position too. # Just go long on tkr until I learn more. order_target_percent(tkr, 1.0) # I should get dates,prices from data[tkr] mytimes = ct.rdt[tkr] myprices= ct.rp[tkr] myip = ct.ip[tkr] # This should collect initial predictions. myg4 = ct.myg4[tkr] # Collect effectiveness of last 4 ip. myeff = ct.myeff[tkr] # I should collect sum(myg4) if data[tkr].datetime.strftime('%M') == '58': # Once an hour I should: mytimes.append( data[tkr].datetime) myprices.append(data[tkr].price) # If I have enough is-data, I should learn/predict if len(myprices) == myprices.maxlen: # I should sort prices by date. datep_df = df([list(mytimes)]).T datep_df.columns = ['pdate'] datep_df['cp'] = list(myprices) datep_df_sorted = datep_df.sort('pdate') cp = list(datep_df_sorted['cp']) # current price lag1 = np.array(lagn(hc*1,cp)) # current price, hc-hours-ago lag2 = np.array(lagn(hc*2,cp)) lag3 = np.array(lagn(hc*3,cp)) lag4 = np.array(lagn(hc*4,cp)) lag5 = np.array(lagn(hc*5,cp)) lag6 = np.array(lagn(hc*6,cp)) leadp = np.array(leadn(hc*1,cp)) # future price in hc-hours cp = np.array(cp) n1g = (leadp - cp) / cp # normalized gain # I should use a DataFrame to collect my X-values: bigx_df = df([list((cp - lag1) / lag1)]).T bigx_df.columns = ['x1'] bigx_df['x2'] = (cp - lag2) / lag2 bigx_df['x3'] = (cp - lag3) / lag3 bigx_df['x4'] = (cp - lag4) / lag4 bigx_df['x5'] = (cp - lag5) / lag5 bigx_df['x6'] = (cp - lag6) / lag6 yval = n1g > 0 yval_is = yval[0:-1] # in-sample # I should predict now. # goog: In python pandas how I convert dataframe to numpy array? bigx = bigx_df.reset_index().values[:,1:] bigx_is = bigx[0:-1] # in-sample bigx_oos = bigx[-1] # out-of-sample knn1 = KNeighborsClassifier(n_neighbors=len(yval_is), weights='distance') knn1.fit(bigx_is,yval_is) ct.ipval = knn1.predict_proba(bigx_oos)[0,1] # I should save this prediction so I can learn from it. myip.append(ct.ipval) # I should get and save effectiveness too. myg = np.sign(ct.ipval - 0.5) * n1g[-1] myg4.append(myg) myeff.append(sum(myg4)) if len(myip) == len(bigx_df): # I should have many ipval and myeff now. # I make them a feature and predict again. bigx_df['myip'] = list(myip) bigx_df['myeff'] = list(myeff) bigx2_df = bigx_df[['x1','x2','x3','x4','myip','myeff']] # I should convert df to np array. bigx2 = bigx2_df.reset_index().values[:,1:] bigx2_is = bigx2[0:-1] # in-sample bigx2_oos = bigx2[-1] # out-of-sample knn2 = KNeighborsClassifier(n_neighbors=len(yval_is), weights='distance') knn2.fit(bigx2_is,yval_is) ct.npval = knn2.predict_proba(bigx2_oos)[0,1] # If ct.npval > 0.5 I should go long. # Else I should go short. # I should use kelly to magnify the prediction direction. # A prediction of 0.55 should give me a kelly of: # ct.kelly_base + (ct.kelly_x * 0.05) ct.kelly = np.sign(ct.npval - 0.5)*(ct.kelly_base+ct.kelly_x*abs(ct.npval - 0.5)) order_target_percent(tkr, ct.kelly) record(kelly = ct.kelly) def leadn(n, lst): dq = deque(lst, maxlen=len(lst)) for i in range(1,1+n): dq.append(lst[-1]) return(list(dq)) def lagn(n, lst): dq = deque(lst, maxlen=len(lst)) for i in range(1,1+n): dq.appendleft(lst[0]) return(list(dq))