Back to Community
Trading with K-Means and LASSO Regression

my first trading algo

This strategy dynamically chooses the top performing stocks (by Sharpe ratio) of each cluster, then uses an L1 regularization term (LASSO) to penalize the portfolio weights and achieve an all-long portfolio, with quarterly rebalancing, or at least, that's the goal here

I added as many comments in the source code as possible to help elucidate, step-by-step, how the code operates. Below are some notes I took after messing with it for a bit:

  • More clusters results in lower beta (expected that)
  • Shifting market entry point by 17 days from month's start to try and coincide with the release of earnings reports resulted in -300% returns (unexpected, will have to test further to evaluate the best entry point)
  • Cluster size of 30 seemed to provide the best risk/return tradeoff
  • Kept penalization parameter at 2 for all tests
  • This algo would not survive the 2008-2009 financial crisis

Feedback and criticism are heavily encouraged and appreciated!! Particularly how to go about reducing the risk factors associated with this strategy

Clone Algorithm
218
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
"""
Stocks are selected using k-means clustering to get a diversified portfolio. Uses L1 regularization term to sparsify the portfolio weights and achieve an all-long portfolio.

"""
import pandas as pd
import numpy as np
import cvxpy as cvx
import math
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.filters.morningstar import Q1500US
from quantopian.pipeline import Pipeline
from quantopian.algorithm import attach_pipeline, pipeline_output
from sklearn.cluster import KMeans


def initialize(context):
    
    # Initialize quarterly counter
    context.iMonths=0
    context.NMonths=3  # <- Sets quarterly rebalancing counter
    
    # Set the slippage model
    set_slippage(slippage.VolumeShareSlippage(volume_limit=0.025, price_impact=0.1))
    
    # Set the commission model (Interactive Brokers Commission)
    set_commission(commission.PerShare(cost=0.0075, min_trade_cost=1.0))
    
    # Attach pipeline
    my_pipe = make_pipeline()
    attach_pipeline(my_pipe, 'fundamental_line')
    
    # Place order according to n-month counter
    schedule_function(cluster_analysis,
                      date_rules.month_start(),
                      time_rules.market_open(hours=1, minutes=30))
    
    schedule_function(calculate_weights,
                      date_rules.month_start(),
                      time_rules.market_open(hours=1, minutes=30))
    
    schedule_function(place_order,
                      date_rules.month_start(),
                      time_rules.market_open(hours=1, minutes=30))
    
    # Record tracking variables at the end of each day.
    schedule_function(record_vars,
                      date_rules.every_day(),
                      time_rules.market_close(minutes=1))
    
       
def before_trading_start(context, data):
    
    # Pipeline_output returns the constructed dataframe.
    context.output = pipeline_output('fundamental_line')
       
def make_pipeline():

    # Get fundamental data from liquid universe for easier trades
    my_assets = morningstar.balance_sheet.total_assets.latest
    my_revenues = morningstar.income_statement.total_revenue.latest
    my_income = morningstar.cash_flow_statement.net_income.latest

    pipe = Pipeline(
              columns={
                'total_assets': my_assets,
                'total_revenues': my_revenues,
                'net_income': my_income,
              },
              screen = Q1500US()
          )
    
    return pipe

def cluster_analysis(context, data):
    
    # Get list of equities that made it through the pipeline
    context.output = context.output.dropna(axis=0, how='any')
    equity_list = []
    for i in range(len(context.output)):
        meh = context.output.index[i]
        equity_list.append(meh)
       
    # Get feature matrix
    x = np.zeros((len(context.output),1))
    for i in range(len(context.output)):
        weighted_netincome = context.output.iloc[i,0] / context.output.iloc[i,1]
        weighted_revenues = context.output.iloc[i,2] / context.output.iloc[i,1]
        x[i] = 0.5 * weighted_netincome + 0.5 * weighted_revenues
            
    context.x = pd.DataFrame(data=x.T, columns=equity_list)
    
    # Run k-means on feature matrix
    n_clust = 30
    alg = KMeans(n_clusters=n_clust, random_state=10, n_jobs=-1)
    context.results = alg.fit_predict(x)
    
    # Get rolling window of past prices and compute returns
    prices = data.history(assets=equity_list, fields='price', bar_count=100, frequency='1d').dropna()
    context.returns_ = prices.pct_change().dropna()
    
    # Make pandas Series of clusters to reference tickers
    cluster_list = pd.Series(data=context.results, index=equity_list)
    
    # Calculate expected returns and Sharpes
    mus = np.mean(context.returns_, axis=1)
    sharpes = mus / np.std(context.returns_, axis=1)
    
    # Choose stocks with highest Sharpe ratio from each cluster
    context.best_equities = []
    max_equity = None
    for i in range(n_clust):
        counter_1 = i
        counter_2 = -100
        for j in range(len(sharpes)):
			if cluster_list.iloc[j]==counter_1:
				if sharpes[j]>counter_2:
					counter_2 = sharpes[j]
					max_equity = cluster_list.index[j]
    	context.best_equities.append(max_equity)
    
    # Prevent same equities from getting ordered multiple times
    context.best_equities = set(context.best_equities)
    print("best equities", context.best_equities)
    print("num of best securities", len(context.best_equities))
    
                    
def calculate_weights(context, data):
    """
    This rebalancing function is called every 3 months (quarterly).
    """
    # Drop equities that didn't make it through pipeline with equity_list
    context.returns_ = context.returns_.T
    bad_list = []
    for row in list(context.returns_.T):
        if row not in context.best_equities:
            bad_list.append(row)
    
    context.returns_ = context.returns_.drop(labels=bad_list)
    context.order_list = list(context.returns_.T)
    print("The order list", context.order_list)
    print("num of remaining securities", len(context.returns_))
    
    # Calculate expected returns
    context.returns_ = np.asmatrix(context.returns_)
    mus = np.mean(context.returns_, axis=1)
    
    # Penalization parameter
    gamma = 2

	# Convert to cvxpy matrix and solve optimization problem
    n = len(mus)
    w = cvx.Variable(n)
    R_weighted = w.T*context.returns_
    mus = mus.T*w

    cost = cvx.sum_squares(mus-R_weighted) + gamma * cvx.norm(w, 1)
    my_problem = cvx.Problem(cvx.Minimize(cost), [w.T*np.ones((n,))==1])
    opt_value = my_problem.solve()
    context.weights = np.asarray(w.value)
    context.weights_list = pd.Series(list(context.weights), context.order_list)
        
    
def place_order(context,data):
    
    context.iMonths += 1
    if (context.iMonths % context.NMonths) != 1:
        return

    for stock in context.order_list:
            if data.can_trade(stock):
                order_target_percent(stock, context.weights_list[stock])
                
def record_vars(context, data):
    
    record(leverage=context.account.leverage, my_return=context.portfolio.returns)
               
There was a runtime error.
6 responses

The algorithm invests mostly on stocks with symbol starts with "A" or "B", there may be some error in the coding.

Loading notebook preview...

I noticed that as well. I'll go into the research environment in a bit to verify whether that's intended or not

Here's the revised source code, with backtest. The stocks definitely weren't being ordered correctly; I corrected the way the best stocks were passed through the algo, and modified the algo to liquidate all positions before re-evaluating the optimal portfolio. The leverage should be 1 all the way through, but during the trades for Febraury 2011, May 2011, November 2012, and May 2013 (there were more, but these were the peaks), the leverage would jump to around 1.2-1.5

I'm pretty sure selling and shorting are two separate things, but are they synonymous for leverage? Is the liquidation causing this? Forgive me for my somewhat limited understanding of these risk metrics

Clone Algorithm
218
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
"""
Stocks are selected using k-means clustering to get a diversified portfolio. Uses L1 regularization term to sparsify the portfolio weights and achieve an all-long portfolio.

"""
import pandas as pd
import numpy as np
import cvxpy as cvx
import math
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.filters.morningstar import Q1500US
from quantopian.pipeline import Pipeline
from quantopian.algorithm import attach_pipeline, pipeline_output
from sklearn.cluster import KMeans



def initialize(context):
    
    # Initialize quarterly counter
    context.iMonths=0
    context.NMonths=3  # <- Sets quarterly rebalancing counter
    
    # Set the slippage model
    set_slippage(slippage.VolumeShareSlippage(volume_limit=0.025, price_impact=0.1))
    
    # Set the commission model (Interactive Brokers Commission)
    set_commission(commission.PerShare(cost=0.0075, min_trade_cost=1.0))
    
    # Attach pipeline
    my_pipe = make_pipeline()
    attach_pipeline(my_pipe, 'fundamental_line')
    
    # Initialize counter to close out positions
    context.counter = 0
    
    # Place order according to n-month counter
    schedule_function(cluster_analysis,
                      date_rules.month_start(),
                      time_rules.market_open(hours=1, minutes=30))
    
    schedule_function(calculate_weights,
                      date_rules.month_start(),
                      time_rules.market_open(hours=1, minutes=30))
    
    # schedule_function(close_positions,
    #                   date_rules.month_start(),
    #                   time_rules.market_open(hours=1, minutes=30))
    
    schedule_function(place_order,
                      date_rules.month_start(),
                      time_rules.market_open(hours=1, minutes=30))
    
    # Record tracking variables at the end of each day.
    schedule_function(record_vars,
                      date_rules.every_day(),
                      time_rules.market_close(minutes=1))
    
    
       
def before_trading_start(context, data):
    
    # Pipeline_output returns the constructed dataframe.
    context.output = pipeline_output('fundamental_line')
       
def make_pipeline():

    # Get fundamental data from liquid universe for easier trades
    my_assets = morningstar.balance_sheet.total_assets.latest
    my_revenues = morningstar.income_statement.total_revenue.latest
    my_income = morningstar.cash_flow_statement.net_income.latest

    pipe = Pipeline(
              columns={
                'total_assets': my_assets,
                'total_revenues': my_revenues,
                'net_income': my_income,
              },
              screen = Q1500US()
          )
    
    return pipe


                
def cluster_analysis(context, data):
    
   
    # Get list of equities that made it through the pipeline
    context.output = context.output.dropna(axis=0, how='any')
    equity_list = []
    for i in range(len(context.output)):
        meh = context.output.index[i]
        equity_list.append(meh)
       
    # Get feature matrix
    x = np.zeros((len(context.output),1))
    for i in range(len(context.output)):
        weighted_netincome = context.output.iloc[i,0] / context.output.iloc[i,1]
        weighted_revenues = context.output.iloc[i,2] / context.output.iloc[i,1]
        x[i] = 0.5 * weighted_netincome + 0.5 * weighted_revenues
            
    context.x = pd.DataFrame(data=x.T, columns=equity_list)
    
    # Run k-means on feature matrix
    n_clust = 30
    alg = KMeans(n_clusters=n_clust, random_state=10, n_jobs=-1)
    context.results = alg.fit_predict(x)
    
    # Get rolling window of past prices and compute returns
    context.prices = data.history(assets=equity_list, fields='price', bar_count=252, frequency='1d').dropna()
    close_prices = context.prices
    close_prices = np.asarray(close_prices)
    close_prices = close_prices.T
    
    rows = len(close_prices)
    columns = len(close_prices[0])
    R = np.zeros((rows, columns))
    mus = np.zeros((rows,1))
    stds = np.zeros((rows,1))
    sharpes = np.zeros((rows,1))
    for i in range(rows):
        for j in range(columns-1):
            dew = math.log(close_prices[i,j+1])-math.log(close_prices[i,j])
            R[i,j] = dew
        mus[i] = np.mean(R[i,:])
        stds[i] = np.std(R[i,:])
        sharpes[i] = mus[i] / stds[i]
     
    # Make pandas Series of clusters to reference tickers
    cluster_list = pd.Series(data=context.results, index=equity_list)
    
    # Choose stocks with highest Sharpe ratio from each cluster
    context.best_equities = []
    max_equity = None
    for i in range(n_clust):
        counter_1 = i
        counter_2 = -100
        for j in range(len(sharpes)):
			if cluster_list.iloc[j]==counter_1:
				if sharpes[j]>counter_2:
					counter_2 = sharpes[j]
					max_equity = cluster_list.index[j]
    	context.best_equities.append(max_equity)
    
    
                   
def calculate_weights(context, data):
    """
    This rebalancing function is called every 3 months (quarterly).
    """
    
    # Drop equities that didn't make it through pipeline with 			equity_list
    context.prices = context.prices.dropna(axis=0, how='any')
    close_prices = context.prices
    for row in list(close_prices):
		if row not in context.best_equities:
            		close_prices = close_prices.drop(row, 1)
    
    context.order_list = list(close_prices)
    # print("The order list", context.order_list)
    
    # Make numpy array for easier handling, and transpose so my 			imported code can handle the matrix arithmetic
    close_prices = np.asarray(close_prices)
    close_prices = close_prices.T
    
    rows = len(close_prices)
    columns = len(close_prices[0])
    R = np.zeros((rows, columns))
    mus = np.zeros((rows,1))
    for i in range(rows):
        for j in range(columns-1):
            dew = math.log(close_prices[i,j+1])-math.log(close_prices[i,j])
            R[i,j] = dew
        mus[i] = np.mean(R[i,:])
    
    # Penalization parameter
    gamma = 5

 	# Convert to cvxpy matrix and solve optimization problem
    n = len(mus)
    w = cvx.Variable(n)
    R_weighted = w.T*R
    mus = mus.T*w

    cost = cvx.sum_squares(mus-R_weighted) + gamma * cvx.norm(w, 1)
    my_problem = cvx.Problem(cvx.Minimize(cost), [w.T*np.ones((n,))==1])
    opt_value = my_problem.solve()
    context.weights = np.asarray(w.value)
    context.weights_list = pd.Series(list(context.weights), context.best_equities)
    solution = np.asarray(w.value)
    print("Portfolio positions", context.portfolio.positions)
    print("ORDERS", context.weights_list)
        
    
def place_order(context,data):
    
    context.iMonths += 1
    if (context.iMonths % context.NMonths) != 1:
        return
    else:
        for stock in context.portfolio.positions:
                if context.portfolio.positions[stock].amount != 0:
                    order_target_percent(stock, 0 )
                    print("Closing out", context.portfolio.positions[stock], stock)
                    
        for stock in context.best_equities:
            if data.can_trade(stock):
                order_target_percent(stock, context.weights_list[stock])
                
    
def record_vars(context, data):
    
    record(leverage=context.account.leverage)
               
There was a runtime error.

so I changed the order logic a bit and dropped the clusters to 15, and the algo seems to perform decently from 2010 to 2017. this is my best effort to keep leverage near 1, and the algo is working mostly as intended, so I'll probably move on from this. so thankful for all the wonderful resources on this website for research

Clone Algorithm
218
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
"""
Stocks are selected using k-means clustering to get a diversified portfolio. Uses L1 regularization term to sparsify the portfolio weights and achieve an all-long portfolio.

"""
import pandas as pd
import numpy as np
import cvxpy as cvx
import math
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.filters.morningstar import Q1500US
from quantopian.pipeline import Pipeline
from quantopian.algorithm import attach_pipeline, pipeline_output
from sklearn.cluster import KMeans



def initialize(context):
    
    # Initialize quarterly counter
    context.iMonths=0
    context.NMonths=3  # <- Sets quarterly rebalancing counter
    
    # # Set the slippage model
    set_slippage(slippage.VolumeShareSlippage(volume_limit=0.025, price_impact=0.1))
    
    # Set the commission model (Interactive Brokers Commission)
    set_commission(commission.PerShare(cost=0.001, min_trade_cost=0))
    
    # Attach pipeline
    my_pipe = make_pipeline()
    attach_pipeline(my_pipe, 'fundamental_line')
    
    # Place order according to n-month counter
    # Clear out orders that remain from liquidation
    # schedule_function(close_shorts,
    #                   date_rules.month_start(),
    #                   time_rules.market_open(hours=0, minutes=1))
    
    schedule_function(cluster_analysis,
                      date_rules.month_start(),
                      time_rules.market_open(hours=0, minutes=5))
    
    schedule_function(calculate_weights,
                      date_rules.month_start(),
                      time_rules.market_open(hours=0, minutes=5))
    
    schedule_function(place_order,
                      date_rules.month_start(),
                      time_rules.market_open(hours=0, minutes=5))
    
    # Record tracking variables at the end of each day.
    schedule_function(record_vars,
                      date_rules.every_day(),
                      time_rules.market_close(minutes=1))
    
    
       
def before_trading_start(context, data):
    
    # Pipeline_output returns the constructed dataframe.
    context.output = pipeline_output('fundamental_line')
       
def make_pipeline():

    # Get fundamental data from liquid universe for easier trades
    my_assets = morningstar.balance_sheet.total_assets.latest
    my_revenues = morningstar.income_statement.total_revenue.latest
    my_income = morningstar.cash_flow_statement.net_income.latest

    pipe = Pipeline(
              columns={
                'total_assets': my_assets,
                'total_revenues': my_revenues,
                'net_income': my_income,
              }, screen = Q1500US()
          )
    
    return pipe


                
def cluster_analysis(context, data):
    
   
    # Get list of equities that made it through the pipeline
    context.output = context.output.dropna(axis=0, how='any')
    equity_list = []
    for i in range(len(context.output)):
        meh = context.output.index[i]
        equity_list.append(meh)
       
    # Get feature matrix
    x = np.zeros((len(context.output),1))
    for i in range(len(context.output)):
        weighted_netincome = context.output.iloc[i,0] / context.output.iloc[i,1]
        weighted_revenues = context.output.iloc[i,2] / context.output.iloc[i,1]
        x[i] = 0.5 * weighted_netincome + 0.5 * weighted_revenues
            
    context.x = pd.DataFrame(data=x.T, columns=equity_list).dropna(axis=1, how='any')
    
    # Run k-means on feature matrix
    n_clust = 15
    alg = KMeans(n_clusters=n_clust, random_state=10, n_jobs=1)
    context.results = alg.fit_predict(x)
    
    # Get rolling window of past prices and compute returns
    context.prices = data.history(assets=equity_list, fields='price', bar_count=90, frequency='1d').dropna()
    close_prices = context.prices
    close_prices = np.asarray(close_prices)
    close_prices = close_prices.T
    
    rows = len(close_prices)
    columns = len(close_prices[0])
    R = np.zeros((rows, columns))
    mus = np.zeros((rows,1))
    stds = np.zeros((rows,1))
    sharpes = np.zeros((rows,1))
    for i in range(rows):
        for j in range(columns-1):
            dew = math.log(close_prices[i,j+1])-math.log(close_prices[i,j])
            R[i,j] = dew
        mus[i] = np.mean(R[i,:])
        stds[i] = np.std(R[i,:])
        sharpes[i] = mus[i] / stds[i]
     
    # Make pandas Series of clusters to reference tickers
    cluster_list = pd.Series(data=context.results, index=equity_list)
    
    # Choose stocks with highest Sharpe ratio from each cluster
    context.best_equities = []
    max_equity = None
    for i in range(n_clust):
        counter_1 = i
        counter_2 = -100
        for j in range(len(sharpes)):
			if cluster_list.iloc[j]==counter_1:
				if sharpes[j]>counter_2:
					counter_2 = sharpes[j]
					max_equity = cluster_list.index[j]
    	context.best_equities.append(max_equity)
        
              
def calculate_weights(context, data):
    """
    This rebalancing function is called every 3 months (quarterly).
    """
    
    # Drop equities that didn't make it through pipeline with equity_list
    context.prices = context.prices.dropna(axis=0, how='any')
    close_prices = context.prices
    for row in list(close_prices):
		if row not in context.best_equities:
            		close_prices = close_prices.drop(row, 1)
    
    context.order_list = list(close_prices)
    
    # Make numpy array for easier handling, and transpose so my 			imported code can handle the matrix arithmetic
    close_prices = np.asarray(close_prices)
    close_prices = close_prices.T
    
    rows = len(close_prices)
    columns = len(close_prices[0])
    R = np.zeros((rows, columns))
    mus = np.zeros((rows,1))
    for i in range(rows):
        for j in range(columns-1):
            dew = math.log(close_prices[i,j+1])-math.log(close_prices[i,j])
            R[i,j] = dew
        mus[i] = np.mean(R[i,:])
    
    # Penalization parameter
    gamma = 5

 	# Convert to cvxpy matrix and solve optimization problem
    n = len(mus)
    w = cvx.Variable(n)
    R_weighted = w.T*R
    mus = mus.T*w

    cost = cvx.sum_squares(mus-R_weighted) + gamma * cvx.norm(w, 1)
    my_problem = cvx.Problem(cvx.Minimize(cost), [cvx.sum_entries(w)==1])
    opt_value = my_problem.solve()
    context.weights = np.asarray(w.value)
    context.weights_list = pd.Series(list(context.weights), context.order_list)
    solution = np.asarray(w.value)
    print("Sum of the weights", np.sum(solution))
    # print("ORDERS", context.weights_list)
        
    
def place_order(context,data):
    
    context.iMonths += 1
    if (context.iMonths % context.NMonths) != 1:
        return
    
    for stock in context.portfolio.positions:
        if stock not in context.order_list:
            order_target(stock, 0)
            print("Closing out", stock)

    for stock in context.order_list:
        if data.can_trade(stock):
            order_target_percent(stock, context.weights_list[stock])
            
    
                        
# def close_shorts(context, data):
    
#     context.iMonths += 1
#     if (context.iMonths % context.NMonths) != 1:
#         return
#     try:
#         # If previous positions haven't been liquidated, make it happen here
#         for stock in context.portfolio.positions:
#                         if context.portfolio.positions[stock].amount != 0:
#                             order_target_percent(stock, 0 )
#                             print("Closing out", stock)
                            
#     except KeyError as b:
#         print("No shorts exist")
                 
                
def record_vars(context, data):
    
    record(leverage=context.account.leverage)
               
There was a runtime error.

I've simplified the code and made 2 changes:
1. Exclude single stock clusters
2. Maximum allocation to single stock limited to 30%

Clone Algorithm
119
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
"""
Stocks are selected using k-means clustering to get a diversified portfolio. Uses L1 regularization term to sparsify the portfolio weights and achieve an all-long portfolio.

"""
import pandas as pd
import numpy as np
import cvxpy as cvx
import math
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.filters.morningstar import Q1500US
from quantopian.pipeline import Pipeline
from quantopian.algorithm import attach_pipeline, pipeline_output
from sklearn.cluster import KMeans



def initialize(context):
    
    # Initialize quarterly counter
    context.iMonths=0
    context.NMonths=3  # <- Sets quarterly rebalancing counter
    
    # # Set the slippage model
    set_slippage(slippage.VolumeShareSlippage(volume_limit=0.025, price_impact=0.1))
    
    # Set the commission model (Interactive Brokers Commission)
    set_commission(commission.PerShare(cost=0.001, min_trade_cost=0))
    
    # Attach pipeline
    my_pipe = make_pipeline()
    attach_pipeline(my_pipe, 'fundamental_line')
    
    schedule_function(schedule,
                      date_rules.month_start(),
                      time_rules.market_open(hours=0, minutes=5))
    
    # Record tracking variables at the end of each day.
    schedule_function(record_vars,
                      date_rules.every_day(),
                      time_rules.market_close(minutes=1))
    
    
       
def before_trading_start(context, data):
    
    # Pipeline_output returns the constructed dataframe.
    context.output = pipeline_output('fundamental_line')
       
def make_pipeline():

    # Get fundamental data from liquid universe for easier trades
    my_assets = morningstar.balance_sheet.total_assets.latest
    my_revenues = morningstar.income_statement.total_revenue.latest
    my_income = morningstar.cash_flow_statement.net_income.latest

    pipe = Pipeline(
              columns={
                'total_assets': my_assets,
                'total_revenues': my_revenues,
                'net_income': my_income,
              }, screen = Q1500US()
          )
    
    return pipe

def schedule(context, data):
    cluster_analysis(context, data)
    calculate_weights(context, data)
    place_order(context, data)
                
def cluster_analysis(context, data):
    # Get list of equities that made it through the pipeline
    context.output['return_on_asset'] = context.output.net_income / context.output.total_assets
    context.output['asset_turnover'] = context.output.total_revenues / context.output.total_assets
    context.output['feature'] = 0.5 * context.output.return_on_asset + 0.5 * context.output.asset_turnover
    context.output = context.output.replace([np.inf, -np.inf], np.nan).dropna(axis=0, how='any')
    
    equity_list = context.output.index
    
    # Run k-means on feature matrix
    n_clust = 15
    alg = KMeans(n_clusters=n_clust, random_state=10, n_jobs=1)
    cluster_results = alg.fit_predict(context.output[['feature']])
    context.output['cluster'] = pd.Series(cluster_results, index=equity_list)
    
    # Remove single stock cluster
    cluster_count = context.output.cluster.groupby(context.output.cluster).count()
    context.output = context.output[context.output.cluster.isin(cluster_count[cluster_count > 1].index)]
    
    print('cluster result', cluster_count)
    
    # Get rolling window of past prices and compute returns
    context.prices = data.history(assets=equity_list, fields='price', bar_count=90, frequency='1d').dropna()
    
    log_return = context.prices.apply(np.log).diff()
    
    # Calculate expected returns and Sharpes
    mean_return = log_return.mean().to_frame(name='mean_return')
    return_std = log_return.std().to_frame(name='return_std')
    context.output = context.output.join(mean_return).join(return_std)
    context.output['sharpes'] = context.output.mean_return / context.output.return_std
    
    top_n_sharpes = 1
    best_n_sharpes = context.output.groupby('cluster')['sharpes'].nlargest(top_n_sharpes)
    #print('best n sharpes', best_n_sharpes)
    
    context.best_equities = best_n_sharpes.index.get_level_values(1)
    
def calculate_weights(context, data):
    """
    This rebalancing function is called every 3 months (quarterly).
    """
    log_return = context.prices[context.best_equities].apply(np.log).diff().dropna()
    mean_return = log_return.mean().to_frame(name='mean_return')
    
    R = log_return.values.T
    mus = mean_return.values
    
    # Penalization parameter
    gamma = 5

     # Convert to cvxpy matrix and solve optimization problem
    n = len(mus)
    w = cvx.Variable(n)
    R_weighted = w.T*R
    mus = mus.T*w

    cost = cvx.sum_squares(mus-R_weighted) + gamma * cvx.norm(w, 1)
    my_problem = cvx.Problem(cvx.Minimize(cost), [cvx.sum_entries(w)==1, w>=0, w<=0.3])
    opt_value = my_problem.solve()
    weights = np.asarray(w.value).T[0]
    context.weights_list = pd.Series(weights, index=context.best_equities)
    print("Sum of the weights", context.weights_list.sum())
    print("ORDERS", context.weights_list)
    
    
def place_order(context,data):
    
    context.iMonths += 1
    if (context.iMonths % context.NMonths) != 1:
        return
    
    for stock in context.portfolio.positions:
        if stock not in context.weights_list.index:
            order_target(stock, 0)
            print("Closing out", stock)

    for stock in context.weights_list.index:
        if data.can_trade(stock):
            order_target_percent(stock, context.weights_list[stock])

def record_vars(context, data):
    
    record(leverage=context.account.leverage)
               
There was a runtime error.

hey thanks xelio! i wasn't quite sure how to add those kinds of constraints, but your source code makes things much clearer. it'll definitely come in handy for my mean-reversion algo