Perhaps some help interpreting what I've done

I was trying to work on an algo based on reading this posting from Michael Harris.

Of course, I have no idea what I'm doing really, so I don't want to cast a poor light on the mentioned technique. But as the formula is never stated, I was left poking around to achieve similar results.

When I look at the custom data in the backtest, there seems to be something intuitive about where the curIdx value is relative to the top and bottom bounds. However, my attempt at normalizing the data to find the percentage doesn't quite cut it when actually running the test.

So,
1) is it even reasonable to be able to select the buy/sell moments based on my custom data? Clearly it seems that if I could initiate a buy at the bottom of the curve and a sell at the top of the curve it should result in positive returns.

2) any ideas on a more realistic approach toward finding the "PSI" which Michael Harris refers to in his post?

27
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
from scipy import stats
import numpy as np

@batch_transform(window_length=21)
def get_prices(datapanel):
return {'high':datapanel['high'], 'low':datapanel['low']}

def initialize(context):
context.current_day = 0
context.initialized = False
context.openOrders = False
context.benchmark2_amount = {}
context.benchmark2_value = 0
set_commission(commission.PerShare(cost=.0035))

context.STOCKS      = [sid(8554)]

def handle_data(context, data):
if not context.initialized:
context.portfolio.positions = {}
for sid in context.STOCKS:
if sid in data:
context.benchmark2_amount[sid] = round(context.portfolio.cash / len(context.STOCKS) / data[sid]['price'])

context.initialized = True

if context.current_day != get_datetime().day:
context.current_day = get_datetime().day

if hasPendingOrders(context,data):
return

timeSeriesData = get_prices(data)
if timeSeriesData is None:
return

context.benchmark2_value = 0
for sid in context.STOCKS:
if sid in data:
high = {}
low = {}
context.benchmark2_value += (context.benchmark2_amount[sid] * data[sid]['price'])

line = np.arange(len(timeSeriesData['high'][sid]))
high['stats'] = stats.linregress(line,timeSeriesData['high'][sid])
low['stats'] = stats.linregress(line,timeSeriesData['low'][sid])

high['slope'] = high['stats']
low['slope'] = low['stats']

high['int'] = high['stats']
low['int'] = low['stats']

high['std'] = high['stats']
low['std'] = low['stats']

curPrice = data[sid]['price']
topBound = max(curPrice - high['int'],timeSeriesData['high'][sid].max() - high['int'])
botBound = min(curPrice - high['int'],timeSeriesData['low'][sid].min() - low['int'])
pct = (curPrice - high['int'] - botBound) / (topBound - botBound)
record(**{"pct":pct})
record(**{"top":topBound})
record(**{"bot":botBound})
record(**{"curIdx":data[sid]['price'] - high['int']})

price = data[sid]['price']
if pct <= 0.2 and context.portfolio.positions[sid].amount == 0:
amount = round(context.portfolio.cash/price)
log.info("BUY: %s \t%d \[email protected] %f"%(sid.symbol,int(amount),price))
order(sid,amount)
elif pct >= 0.8 and context.portfolio.positions[sid].amount > 0:
order(sid,-context.portfolio.positions[sid].amount)
log.info("SELL: %s \t%d \[email protected] %f"%(sid.symbol,int(context.portfolio.positions[sid].amount),price))
#record(**{"B2":context.benchmark2_value})
##---------------------------------------------------------------

###------------------------------------------------------
#   Determines wether or not there are any orders still processing
###------------------------------------------------------
def hasPendingOrders(context,data):
###- If orders were open, lets check to make sure they still are.
if context.openOrders == True:
open_orders = []
for item in context.STOCKS:
open_orders.append(bool(get_open_orders(item)))
if True in open_orders:
context.openOrders = True
return True
else:
context.openOrders = False
return False
else:
return False     
This backtest was created using an older version of the backtester. Please re-run this backtest to see results using the latest backtester. Learn more about the recent changes.
There was a runtime error.
10 responses

A slight variation, using the standard deviation rather than the intercept.

27
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
from scipy import stats
import numpy as np

@batch_transform(window_length=21)
def get_prices(datapanel):
return {'high':datapanel['high'], 'low':datapanel['low']}

def initialize(context):
context.current_day = 0
context.initialized = False
context.openOrders = False
context.benchmark2_amount = {}
context.benchmark2_value = 0
set_commission(commission.PerShare(cost=.0035))

context.STOCKS      = [sid(8554),sid(2174)]

def handle_data(context, data):
if not context.initialized:
context.portfolio.positions = {}
for sid in context.STOCKS:
if sid in data:
context.benchmark2_amount[sid] = round(context.portfolio.cash / len(context.STOCKS) / data[sid]['price'])

context.initialized = True

if context.current_day != get_datetime().day:
context.current_day = get_datetime().day

if hasPendingOrders(context,data):
return

timeSeriesData = get_prices(data)
if timeSeriesData is None:
return

context.benchmark2_value = 0
for sid in context.STOCKS:
if sid in data:
high = {}
low = {}
context.benchmark2_value += (context.benchmark2_amount[sid] * data[sid]['price'])

line = np.arange(len(timeSeriesData['high'][sid]))
high['stats'] = stats.linregress(line,timeSeriesData['high'][sid])
low['stats'] = stats.linregress(line,timeSeriesData['low'][sid])

high['int'] = high['stats']
low['int'] = low['stats']

high['std'] = high['stats']
low['std'] = low['stats']

curPrice = data[sid]['price']
topBound = max(curPrice + high['std'],timeSeriesData['high'][sid].max() - high['std'])
botBound = min(curPrice + high['std'],timeSeriesData['low'][sid].min() - low['std'])
pct = (curPrice + high['std'] - botBound) / (topBound - botBound)

price = data[sid]['price']
if pct <= 0.075 and context.portfolio.positions[sid].amount == 0:
amount = round(context.portfolio.cash/price)
log.info("BUY: %s \t%d \[email protected] %f"%(sid.symbol,int(amount),price))
order(sid,amount)
elif pct >= 0.5 and context.portfolio.positions[sid].amount > 0:
order(sid,-context.portfolio.positions[sid].amount)
log.info("SELL: %s \t%d \[email protected] %f"%(sid.symbol,int(context.portfolio.positions[sid].amount),price))

record(**{"B2":context.benchmark2_value})
##---------------------------------------------------------------

###------------------------------------------------------
#   Determines wether or not there are any orders still processing
###------------------------------------------------------
def hasPendingOrders(context,data):
###- If orders were open, lets check to make sure they still are.
if context.openOrders == True:
open_orders = []
for item in context.STOCKS:
open_orders.append(bool(get_open_orders(item)))
if True in open_orders:
context.openOrders = True
return True
else:
context.openOrders = False
return False
else:
return False     
This backtest was created using an older version of the backtester. Please re-run this backtest to see results using the latest backtester. Learn more about the recent changes.
There was a runtime error.

And just to verify, I ran my version against the same timeline in the PSI posting and the results are extremely similar. However, I can't seem to replicate the positive returns he claims to see.

4
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
from scipy import stats
import numpy as np

@batch_transform(window_length=30)
def get_prices(datapanel):

def initialize(context):
context.current_day = 0
context.initialized = False
context.openOrders = False
context.benchmark2_amount = {}
context.benchmark2_value = 0
set_commission(commission.PerShare(cost=.0035))

context.STOCKS      = [sid(8554)]

def handle_data(context, data):
if not context.initialized:
context.portfolio.positions = {}
for sid in context.STOCKS:
if sid in data:
context.benchmark2_amount[sid] = round(context.portfolio.cash / len(context.STOCKS) / data[sid]['price'])

context.initialized = True

if context.current_day != get_datetime().day:
context.current_day = get_datetime().day

if hasPendingOrders(context,data):
return

timeSeriesData = get_prices(data)
if timeSeriesData is None:
return

context.benchmark2_value = 0
for sid in context.STOCKS:
if sid in data:
high = {}
low = {}
context.benchmark2_value += (context.benchmark2_amount[sid] * data[sid]['price'])

high['stats'] = stats.linregress(line,timeSeriesData['high'][sid])
low['stats'] = stats.linregress(line,timeSeriesData['low'][sid])

high['slope'] = np.degrees(np.arctan(high['stats']))/90.0
low['slope'] = np.degrees(np.arctan(low['stats']))/90.0

high['int'] = high['stats']
low['int'] = low['stats']

high['std'] = high['stats']
low['std'] = low['stats']

curPrice = data[sid]['price']
topBound = max(curPrice + high['std'],timeSeriesData['high'][sid].max() - high['std'])
botBound = min(curPrice - low['std'],timeSeriesData['low'][sid].min() - low['std'])
pct = (curPrice + high['std'] - botBound) / (topBound - botBound)
record(**{"PCT":pct})

price = data[sid]['price']
if pct >= 0.95 and context.portfolio.positions[sid].amount == 0:
amount = round(context.portfolio.cash/price)
log.info("BUY: %s \t%d \[email protected] %f"%(sid.symbol,int(amount),price))
order(sid,amount)
elif pct <= 0.05 and context.portfolio.positions[sid].amount > 0:
order(sid,-context.portfolio.positions[sid].amount)
log.info("SELL: %s \t%d \[email protected] %f"%(sid.symbol,int(context.portfolio.positions[sid].amount),price))

#record(**{"B2":context.benchmark2_value})
##---------------------------------------------------------------

###------------------------------------------------------
#   Determines wether or not there are any orders still processing
###------------------------------------------------------
def hasPendingOrders(context,data):
###- If orders were open, lets check to make sure they still are.
if context.openOrders == True:
open_orders = []
for item in context.STOCKS:
open_orders.append(bool(get_open_orders(item)))
if True in open_orders:
context.openOrders = True
return True
else:
context.openOrders = False
return False
else:
return False     
This backtest was created using an older version of the backtester. Please re-run this backtest to see results using the latest backtester. Learn more about the recent changes.
There was a runtime error.

Hey, this is a cool idea. Can you give more insight/justification into why you chose to compute the upper/lower bounds and percentage the way you did?Reading Harris' post, I don't think he gives an explicit method for computing the probability of rising above the highest close of N bars. Is there an intuitive reason to use the intercept of the regression rather than standard error (stats.linregress returns standard error, not standard deviation)?

            topBound = max(curPrice + high['std'],timeSeriesData['high'][sid].max() - high['std'])
botBound = min(curPrice - low['std'],timeSeriesData['low'][sid].min() - low['std'])
pct = (curPrice + high['std'] - botBound) / (topBound - botBound)


I'm afraid I don't have any insight/justification for why I picked what I did, other than it just sort of seemed like the sort of number I needed to create the bound. And, even more embarrassing, I didn't realize it was standard error, and just assumed it was standard deviation. I'm not really familiar enough with the difference between the two to really make an educated choice between the two (I believe SD somehow takes into account the sample size?). Regardless, the results seem to be in line with the results (of the index at least) presented in the posting.

My thinking was basically this...
Over the course of n days I want to consider the max value of the high prices. At any given moment if I add the amount of potential error or variability to the current price I should get a value that is approaching or breaking through the max high value. I reversed the though process to come up with the bottom bound and found that this neatly has the current price fluctuating between the top and bottom. It also looked like the data presented in the post so I figured if it looked like a duck and sounded like a duck...
Now my hope is to have it considered by smarter people than myself so that I might better understand what's going on. ;)

The standard error, not the standard deviation, takes into account the sample size. See http://en.wikipedia.org/wiki/Standard_error. Also, scipy.stats.lingress returns the standard error of the parameter estimate, not the standard error or standard deviation of the prices themselves.

If you want to compute the variability in the price, probably the best idea is to compute the standard deviation of returns in percent terms (i.e. the volatility).

I am not sure linear regression is going to be the best approach here. You may want to consider more backtesting and research into what market conditions (current price relative to past prices, volatility levels, correlation with other indices, etc.) to justify when you would expect to see an increase in the price of the index.

Here's what I did (spoiler alert - I couldn't make it work). My work is in Excel. First, I tried to figure out his indicator. He writes "The value of the Probability State Indicator™ is the state of probability of price action in the sense of a rise above the highest close of N bars, in this case 30." He doesn't mention how long (days) over which he is calculating the probability of rising above the highest close. The probability of hitting a new high in 1 day is different than 30. In Excel I have written some functions that use a lognormal distribution (same as Black-Scholes option pricing model) which I can use to calculate the probability of a return given a mean, standard deviation and time horizon. So I played around with some of the parameters until I came close to a 59% probability on 9/25/13 which is a data point he gives. My chart of probabilities resembles the shape of his, but my high values rarely get above 80%. I'm fine with that. A 100% probability - which happens pretty frequently in his chart - means that it's certain (and we're not talking about death and taxes) the market will go up. That strikes me a wrong for periods over which traders operate. Still I continued. Then I tested thresholds at which to buy and sell the SPY from 12/31/1993 to 1/17/2014. No threshold surpassed buy and hold (but in my simple test I earned 0 when I wasn't in the market).

Good luck.

At least glad to see that others are finding the same results I am. I have reached out to Mr. Harris to ask if he would further elaborate. If he gets back to me then I'll report back.

By fiddling with the threshholds I can usually find a value that is at least positive, though rarely beating the benchmark. However, the numbers I find don't generally work for an entirely different timeline, so I'm not yet won over to his mehod. At the very least, however, it's provided for some interesting learning for me.

Mr. Harris was kind enough to get back to me. Here is what he had to say...

"Backtesting results are difficult to recreate unless the system logic is exact. I used a proprietary algorithm from stochastic processes for that system and the objective was to show that systems that work across many different markets do exist. I never expect a system to match the buy and hold return of a bull market but I expect it to do better during bear or sideways markets to make up for the underperformance."

Anony Mole (and others), yes I have some VBA functions I am happy to share (remember me when you make a fortune trading). Here are the ones somewhat related to this problem. Note these function use an ARITHMETIC average return, not a GEOMETRIC. If you need an explanation of the difference or why the arithmetic is used here, let me know.

Option Explicit
'Modified 3/25/98

Function AA_Wealth(vProb As Single, vReturn As Single, vRisk As Single, vTime As Single, Optional vAmount)
'Calculates the wealth for which there is only a vProb chance of doing worse 'than over vTime years given a mean of vMean and a standard deviation of 'vSD. Specify inputs in percentage formats (e.g. 8%=8, not 0.08). Dim vMean As Single
Dim vSD As Single
Dim vLNER As Single, vLNSD As Single, vLNVAR As Single
If IsMissing(vAmount) Then vAmount = 1
vMean = 1 + vReturn / 100
vSD = vRisk / 100
vLNSD = Sqr(Log(1 + (vSD / vMean) ^ 2)) 'var
vLNER = Log(vMean) - vLNSD ^ 2 / 2
AA_Wealth = vAmount * Exp(Application.NormInv(1 - vProb, vLNER * vTime, vLNSD * Sqr(vTime)))
End Function

Function AA_Return(vProb As Single, vMean As Single, vSD As Single, vTime As Single)
'Calculates the return for which there is only a vProb chance of doing worse 'than over vTime years given a mean of vMean and a standard deviation of 'vSD. Periods over 1 year are annualized. Specify inputs in percentage formats (e.g. 8%=8, not 0.08). Dim x
x = AA_Wealth(vProb, vMean, vSD, vTime, 1)
If vTime < 1 Then
AA_Return = (x - 1) * 100
Else
AA_Return = (x ^ (1 / vTime) - 1) * 100
End If
End Function

Function AA_Prob(vPReturn As Single, vReturn As Single, vRisk As Single, vTime As Single)
'Returns the probability of doing at least as well as vReturn over vTime '=NORMDIST(LN((vReturn/100+1)^vTime),vLNER*T\$13,vLNSD*SQRT(vTime),TRUE) Dim vMean As Single
Dim vSD As Single
vMean = 1 + vReturn / 100
vSD = vRisk / 100
Dim vLNER As Single, vLNSD As Single, vLNVAR As Single
vLNSD = Sqr(Log(1 + (vSD / vMean) ^ 2)) 'var
vLNER = Log(vMean) - vLNSD ^ 2 / 2
AA_Prob = 1 - Application.WorksheetFunction.NormDist(Log((vPReturn / 100 + 1) ^ vTime), vLNER * vTime, vLNSD * Sqr(vTime), True)
End Function

Function AA_Rand(vReturn As Single, vRisk As Single, vTime As Single, Optional vSeed)
'Returns a lognormally distributed random variable with a mean of vMean and a standard deviation of 'vSD. Periods over 1 year are annualized. Specify inputs in percentage formats (e.g. 8%=8, not 0.08). Application.Volatile
If IsMissing(vSeed) Or IsNull(vSeed) Then vSeed = 1
Dim vLNER As Single, vLNSD As Single, vLNVAR As Single, dblRnd As Double
Dim vMean As Single
Dim vSD As Single
vMean = 1 + vReturn / 100
vSD = vRisk / 100
vLNSD = Sqr(Log(1 + (vSD / vMean) ^ 2)) 'var
vLNER = Log(vMean) - vLNSD ^ 2 / 2
Do
dblRnd = Rnd(vSeed)
Loop Until (dblRnd > 0 And dblRnd < 1)
AA_Rand = 100 * (Exp(Application.WorksheetFunction.NormInv(dblRnd, vLNER * vTime, vLNSD * Sqr(vTime)) / vTime) - 1)
If vTime >= 1 Then Exit Function
AA_Rand = 100 * ((1 + AA_Rand / 100) ^ vTime - 1)
End Function

I use AA_Rand for do Monte Carlo simulations in Excel. To help me learn Python, I've put a couple of these into Python below:
import random
import array
import math
import numpy as np
from scipy import stats

# various functions related to asset allocation and the generation of random returns

def aa_wealth(prob, ror, risk, t=1.0, pv=1.0):
#Calculates the wealth for which there is only a vProb chance of doing worse
#than over vTime years given a mean of vMean and a standard deviation of
#vSD. Specify inputs in percentage formats (e.g. 8%=8, not 0.08).
vmean=1.0+ror/100.0
vrisk=risk/100
vlnsd=math.sqrt(math.log(1+(vrisk/vmean)**2))
vlner=math.log(vmean)-vrisk**2/2
return pv*math.exp(vlner*t+normsinv(1-prob)*vlnsd*math.sqrt(t))

def aa_return(prob, ror, risk,t,n=1):
#Calculates the return for which there is only a vProb chance of doing worse
#than over vTime years given a mean of vMean and a standard deviation of
#vSD. Periods over 1 year are annualized. Specify inputs in percentage formats (e.g. 8%=8, not 0.08).
x=aa_wealth(prob,ror,risk,t,1.0)
if t1:
result=np.exp((vlner*t+np.random.standard_normal(n)*vlnsd*np.sqrt(t))/t)**t-1
else:
result=np.exp((vlner*t+np.random.standard_normal(n)*vlnsd*np.sqrt(t))/t)-1
return result

(the indentation was lost in the copy/paste process). I could also send the add-in or a macro enabled workbook. My email is my first name, rex, @ last name which is macey with a .US domain (in case there are robots lurking). I'm traveling this week but will try to get it out. I have other functions as well including option pricing (and greeks) and some performance measurement. Caveat: use at your own risk.