A very simple 1.25 Sharpe algorithm

Method:
1. Take related stocks (use clustering or simply use a sector)
2. Remove PC 1 factor
3. Sum the residuals and build 2 signals.

670
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
import math
import numpy as np
import pandas as pd
import scipy as sp
import cvxpy as cvx
from sklearn.covariance import OAS
from sklearn.decomposition import PCA
import statsmodels.api as smapi

from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.algorithm import attach_pipeline, pipeline_output
from quantopian.pipeline.filters.morningstar import Q500US
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.factors.morningstar import MarketCap
from quantopian.pipeline.data import morningstar as mstar

def make_pipeline():
price_filter = USEquityPricing.close.latest >= 15
pipe = Pipeline(screen=Q500US() & price_filter & Sector().eq(309))
return pipe

def initialize(context):
context.Hedge = sid(8554)
context.counter = 90

date_rules.every_day(),
time_rules.market_open(minutes=10))

schedule_function(update_chart,
date_rules.every_day(),
time_rules.market_close(minutes=1))

attach_pipeline(make_pipeline(), "Q500")

def handle_data(context, data):
pass

if context.counter < 90:
context.counter += 1
return
context.counter = 0
context.output = pipeline_output("Q500")
context.indices = context.output.index

prices = data.history(context.indices, "price", 90, "1d").dropna(axis=1)
logP = np.log(prices.values)
diff = np.diff(logP, axis=0)
factors = PCA(1).fit_transform(diff)
betas = model.params.T[:, 1:]
R = sp.stats.zscore(model.resid[-2:, :].sum(axis=0)) - sp.stats.zscore(model.resid[-20:, :].sum(axis=0))
weights = getW(R, betas)

denom = np.sum(np.abs(weights))

if denom == 0:
denom = 1.
weights = weights / denom

for sid in context.portfolio.positions:
if sid not in prices.columns:
order_target(sid, 0)

for i, sid in enumerate(prices.columns):
order_target_percent(sid, weights[i])

def getW(signal, betas):
(m, n) = betas.shape
x = cvx.Variable(m)
objective = cvx.Maximize(signal.T * x)

constraints = [cvx.abs(sum(x)) < 0.001, sum(cvx.abs(x)) <= 1, x <= 3.5 / m, -x <= 3.5 / m]

for i in range(0, n):
constraints.append(cvx.abs(betas[:, i].T * x) < 0.001)

prob = cvx.Problem(objective, constraints)
prob.solve(solver=cvx.CVXOPT)

if prob.status <> 'optimal':
print prob.status
return np.asarray([0.] * m)

return np.asarray(x.value).flatten()

def update_chart(context,data):
record(leverage = context.account.leverage)

longs = shorts = 0

for position in context.portfolio.positions.itervalues():
if position.amount > 0:
longs += 1
if position.amount < 0:
shorts += 1

record(l=longs,s=shorts)
There was a runtime error.
6 responses

Same algorithm on industry group instead of entire sector.

670
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
 Returns 1 Month 3 Month 6 Month 12 Month
 Alpha 1 Month 3 Month 6 Month 12 Month
 Beta 1 Month 3 Month 6 Month 12 Month
 Sharpe 1 Month 3 Month 6 Month 12 Month
 Sortino 1 Month 3 Month 6 Month 12 Month
 Volatility 1 Month 3 Month 6 Month 12 Month
 Max Drawdown 1 Month 3 Month 6 Month 12 Month
import math
import numpy as np
import pandas as pd
import scipy as sp
import cvxpy as cvx
from sklearn.covariance import OAS
from sklearn.decomposition import PCA
import statsmodels.api as smapi

from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.algorithm import attach_pipeline, pipeline_output
from quantopian.pipeline.filters.morningstar import Q500US
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.factors.morningstar import MarketCap
from quantopian.pipeline.data import morningstar as mstar

def make_pipeline():
price_filter = USEquityPricing.close.latest >= 15
pipe = Pipeline(screen=Q500US() & price_filter & Sector().eq(309))
return pipe

def initialize(context):
context.Hedge = sid(8554)
context.counter = 90

date_rules.every_day(),
time_rules.market_open(minutes=10))

schedule_function(update_chart,
date_rules.every_day(),
time_rules.market_close(minutes=1))

attach_pipeline(make_pipeline(), "Q500")

def handle_data(context, data):
pass

if context.counter < 90:
context.counter += 1
return
context.counter = 0
context.output = pipeline_output("Q500")
context.indices = context.output[context.output.ind == 30947].index

prices = data.history(context.indices, "price", 90, "1d").dropna(axis=1)
logP = np.log(prices.values)
diff = np.diff(logP, axis=0)
factors = PCA(1).fit_transform(diff)
betas = model.params.T[:, 1:]
R = sp.stats.zscore(model.resid[-2:, :].sum(axis=0)) - sp.stats.zscore(model.resid[-20:, :].sum(axis=0))
weights = getW(R, betas)

denom = np.sum(np.abs(weights))

if denom == 0:
denom = 1.
weights = weights / denom

for sid in context.portfolio.positions:
if sid not in prices.columns:
order_target(sid, 0)

for i, sid in enumerate(prices.columns):
order_target_percent(sid, weights[i])

def getW(signal, betas):
(m, n) = betas.shape
x = cvx.Variable(m)
objective = cvx.Maximize(signal.T * x)

constraints = [cvx.abs(sum(x)) < 0.001, sum(cvx.abs(x)) <= 1, x <= 3.5 / m, -x <= 3.5 / m]

for i in range(0, n):
constraints.append(cvx.abs(betas[:, i].T * x) < 0.001)

prob = cvx.Problem(objective, constraints)
prob.solve(solver=cvx.CVXOPT)

if prob.status <> 'optimal':
print prob.status
return np.asarray([0.] * m)

return np.asarray(x.value).flatten()

def update_chart(context,data):
record(leverage = context.account.leverage)

longs = shorts = 0

for position in context.portfolio.positions.itervalues():
if position.amount > 0:
longs += 1
if position.amount < 0:
shorts += 1

record(l=longs,s=shorts)
There was a runtime error.

Hi Pravin -

I kinda get the impression that for the Q fund, anything that just uses OHLCV data will be low on the priority list (although in a mult-factor algo, I'd think that one would still want to mix in factors based solely on such data). Since in theory, Q only looks at algo "exhaust" I suppose they'll rely on self-reporting (and for a multi-factor algo, one would need to isolate the returns associated with factors based on novel data). For the contest, it is a different story (both because access to the novel data sets is limited, unless one is willing to buy them, and because one wants an algo that is capable of a nice 6-month run, which I think can be obtained with OHLCV data alone).

No goal Grant. Just sharing ideas to see if anything worthwhile can be made out of it.

Regarding datasets for contest, they purportedly have 50 datasets but I don't see anything meaningful beyond 4-5 (other than fundamentals). How are we supposed to find an edge with the limited datasets?

My understanding is that it is just too expensive to offer all of the data to the masses. Makes sense. However, as I understand, Q will run backtests over the full period, up to the present, and algos would be eligible for allocations.

I guess one approach would be to get an allocation but in the contract as a "manager" insist that one then gets free access to all Q data, for use only to create algos for the fund.

Hi,

Sorry to bump such an old thread but I am curious about the thinking here.

First off: my understanding (on a high level, please let me know if I misunderstood) is that you are trying to say that within an industry has some kind of driver (in this case the first principal component), and all companies here should be reverting to a value predicted by this component.
So what you want to do here is to size your bets according to their deviation from the value predicted by the component.

That being understood I still cannot understand this line:

R = sp.stats.zscore(model.resid[-2:, :].sum(axis=0)) - sp.stats.zscore(model.resid[-20:, :].sum(axis=0))

I cannot really understand why the signal is the difference between two zscores 19 days apart, or am i missing something here?