Back to Community
Method to get historic values from fetcher data

When using fetcher, the History function currently doesn't return the values fetched from data. One is limited to using the standard price,mavg, etc. If you want more than that, you must collect the values yourself in an array, which means your have created a warm-up period for the algorithm. This can be tolerated for backtesting, but is a real problem for live algorithms.

Thanks to Seong Lee of the Quantopian engineering staff, who came up with this clever workaround which will allow me to relaunch my live algorithm this week, using a vix slope calculation.

The algorithm gets around the limitation of returning a single item per day, by stuffing all the values into a string and passing back that single string, and then splitting the string and converting it back to numeric values.

I've tested it live and in simulation.

Clone Algorithm
64
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
# https://secure.helpscout.net/conversation/49460524/1228/

from datetime import timedelta
from pytz import timezone
import statsmodels.api as sm
import numpy as np
import pandas as pd
import talib

liveTrading = False

        
def custom_split(string_list):
    """
        Parses a string and returns it in list format, without the '[' ']' 

        :params string_list: a list that's been made into string e.g. "[ hello, hello2]"
        :returns: a string that's been made into a list e.g. "[hello, hello2]" => [hello, hello2]
    """
    #: Remove the '[' and ']'
    string_list = string_list[1:-1].split(',')
    #: Convert to float
    string_list = [float(s) for s in string_list]
    return string_list

def get_day_delta(current_date):
    """
        Takes in the current date, checks it's day of week, and returns an appropriate date_delta
        E.g. if it's a Monday, the previous date should be Friday, not Sunday

        :params current_date: Pandas TimeStamp
        :returns: an int 
    """
    if current_date.isoweekday() == 1:
        return 3
    else:
        return 1
 
def fill_func(df, row, num_dates):
    """
        Should be applied to every row of a dataframe. Reaches for the past thirty days of each dataframe,
        appends the data to a string, returns the string which should be unpacked later on.

        :params df: The dataframe in it's totality
        :params row: The row of each dataframe, passed through the lambda function of Dataframe.apply(lambda row: row)
        :params num_dates: How many dates to go back (e.g. 30 = 30 days of past data)

        :returns: A list in the form of a string (containing past data) which should be unpacked later on
    """
    #: Instantiate variables
    past_data = []
    #: The current date is the name of the Series (row) being passed in 
    current_date = row.name
    
    #: Iterate through the number of dates from 0->num_dates
    for i in range(num_dates):
        #: How many days to get back, calls get_day_delta for accurate delta assessment
        #day_delta = get_day_delta(current_date)  #old code
        #new code to fix the delay problem
        if liveTrading and (i == 0):
            day_delta = 0
        else:
            day_delta = get_day_delta(current_date)
 
        #: Get the current_date and update the current_date to minus day_delta from the date
        #: To get the appropriate past date
        current_date = current_date - timedelta(days=day_delta)
        try:
            #: Get the price at the given current_date found by get_day_delta
            data = df.ix[current_date]['price']
            past_data.append(data)
        except KeyError:
            #: No data for this date, pass
            pass
        
    #: Return the a list made into a string
    return str(past_data)

   
def post_func(df):
    df = df.rename(columns={'Adj Close': 'price'})
    #fix for time lag
    #df = df.tshift(1, freq='b')
    #df = df.rename(columns={'Open': 'open_price'})
    df = df.fillna(method='ffill')
    df = df[['price', 'sid']]
    #df['price_mean'] = pandas.rolling_mean(df['price'], 30)
    #: Creating a new column of past data using our own custom function per row
    df['past_data'] = df.apply(lambda row: fill_func(df, row, 32), axis=1)
    return df
# the other methods in your algorithm.

def initialize(context):
    # Stores yesterday's day in order to use in handle_data
    context.yesterday = 0
    yahoo_vix_url = "http://ichart.finance.yahoo.com/table.csv?s=%5EVIX&d=1&e=7&f=2015&g=d&a=7&b=13&c=2010&ignore=.csv"


    context.target_notional = 1*pow(10,6)
    context.init_amts = True
    fetch_csv(yahoo_vix_url, date_column='Date', date_format='%Y-%m-%d', symbol='vix', usecols=['Adj Close'], post_func=post_func)
    context.xiv = sid(40516) #xiv


def inmorningwindow(context):
    # Converts all time-zones into US EST to avoid confusion
    if get_datetime().day == context.yesterday:
        return False
    loc_dt = get_datetime().astimezone(timezone('US/Eastern'))
    context.isMonday = loc_dt.weekday() ==0
    if loc_dt.hour == 9 and loc_dt.minute == 45:
        context.yesterday = get_datetime().day
        return True
    else:
        return False

def CalcSlope(array,length):
    vvlong = array[-length:]      
    time = sm.add_constant(range(-len(vvlong)+1,1))
    intercept,slope = sm.OLS(vvlong, time).fit().params
    return intercept,slope

def handle_data(context, data):
    if not(context.xiv and context.xiv in data): return

    #get the vix data
    vix_data = data['vix']

    #exit if the vix data was not fetched
    if (vix_data is None) :
        print("no vix data")
        return

    if not inmorningwindow(context): return


    ##################################################
    #: Example of string unpacking
    try:
        #: Throws an error if you leave it without this except block (The initial error checker does, but it will return expected values once
        #: handle_data is called for the first time)
        vix_data_list = custom_split(data['vix']['past_data'])

    except:
        return
    
    #reverse the lists so it is in a standard quantopian  order
    vix_data_list = vix_data_list[::-1]
    
    vix_intersect,vix_slope = CalcSlope(vix_data_list,10)
    ##################################################
    
    record(vix = vix_data_list[-1],slope10X=vix_slope*10)
    
    
There was a runtime error.
26 responses

Richard,
Thanks for sharing, fetcher/history issues are something I've run into as well. I'm not 100% sure if this will apply to your specific case, but if you would like to do historical calculations on fetched data, there is one other option I have found.

If you add a pre_func argument to the fetch_csv function, you will have access to the entire dataframe while in that function. That means you can apply rolling calculations on the data and append the results as new columns. This example here adds a moving average and standard deviation field to the VIX data. You will have to be careful not to introduce look ahead bias though.

David

Clone Algorithm
49
Loading...
Total Returns
--
Alpha
--
Beta
--
Sharpe
--
Sortino
--
Max Drawdown
--
Benchmark Returns
--
Volatility
--
Returns 1 Month 3 Month 6 Month 12 Month
Alpha 1 Month 3 Month 6 Month 12 Month
Beta 1 Month 3 Month 6 Month 12 Month
Sharpe 1 Month 3 Month 6 Month 12 Month
Sortino 1 Month 3 Month 6 Month 12 Month
Volatility 1 Month 3 Month 6 Month 12 Month
Max Drawdown 1 Month 3 Month 6 Month 12 Month
# https://secure.helpscout.net/conversation/49460524/1228/

from datetime import timedelta
from pytz import timezone
import statsmodels.api as sm
import numpy as np
import pandas as pd
import talib



def pre_func(df):
    vix = df['Adj Close']
    # dates = df['Date']
    df['mean'] = pd.rolling_mean(vix, 30)
    df['std'] = pd.rolling_std(vix, 30)
    return df

def initialize(context):
    # Stores yesterday's day in order to use in handle_data
    context.yesterday = 0
    yahoo_vix_url = "http://ichart.finance.yahoo.com/table.csv?s=%5EVIX&d=1&e=7&f=2015&g=d&a=7&b=13&c=2010&ignore=.csv"


    context.target_notional = 1*pow(10,6)
    context.init_amts = True
    fetch_csv(yahoo_vix_url, 
              date_column='Date', 
              date_format='%Y-%m-%d', 
              symbol='vix', 
              usecols=['Adj Close'],
              pre_func=pre_func)
    context.xiv = sid(40516)

def handle_data(context, data):
    if (context.xiv not in data): return

    #get the vix data
    vix_data = data['vix']

    #exit if the vix data was not fetched
    if (vix_data is None) :
        print("no vix data")
        return
    if 'mean' not in vix_data or 'std' not in vix_data:
        return
    
    record(mean=vix_data['mean'],
           std=vix_data['std'],
           price=vix_data['Adj Close'])
    print vix_data


    
There was a runtime error.

David
Seong Lee suggested this to me first, but I wanted something a bit more generic, because I have a lot of code and I wanted the flexibility of having the array of values to manipulate. But it's good that you mentioned it, because for many people it's probably a better solution.

Rich

I just discovered the need for this, but I think I'll have to do the packed string method - I need to calculate historical spreads between various fetched time-series. Thanks, I look forward to a better way of doing this one day though!

Hi dAVid... what is the difference... between... te first chart and your chart...?? are you using... vix as buy and sell signal... or its already in the algo... that when the vix spikes... automatic buy some stocks... puzzled....

@David - Don't the mean and std seem to be going right to left in your chart? Have in mind that Yahoo historical data comes in reverse-chronological order.

Just realized fetch_csv does not work in live mode. I am stunned that the fetch_csv function was even introduced without proper support in live execution. Now my algo is stuck in backtest mode without this brittle hack.

It works for me, you just can't enter them into the contest. What is the issue you are seeing?

https://www.quantopian.com/posts/function-like-history-but-for-fetch-csv-data

This is the unpleasant hack I use to get access to some number of historical rows of data from fetch_csv, in a way that avoids look-ahead bias. It's been working well enough for a couple of months of live trading though.

EDIT: And all due credit to the above Richard Diehl and Seong Lee for the original idea!

Thank you Simon, I will have a look!

The main issue as I see with these workarounds is that they increase risk of bugs in live algorithms, where you'd least want increased risk. I'd be much happier Quantopian built a consistent API where the difference between live and backtesting was minimal.

Yeah I absolutely agree, but I think fetch_csv in general is something that Quantopian has less interest in supporting these days, so I think it's basically take-it-or-leave-it. Don't want to speak for them though.

Good motivation to keep alternative options on the back burner!

Yeah, the new pivot seems to be selling data so the motivation for providing methods for people to get their own is diminishing I guess!

@Klon - I'd be much happier Quantopian did not release tools that allow you to calculate moving averages based on future data; and when the bug was discovered (see July 25, 2015, above), admit and correct it, rather than just quietly disallowing it in the contest.

Yeah, fetch_csv can be tricky to use properly.

There is no simple way. To do it correctly, you must do it the complicated way.

Perhaps try printing the specific value you are interested in, the same way you are recording it.

data.current('vix', 'Adj Close')

I posted a simple version of saving the data to an array:

https://www.quantopian.com/posts/fetcher-history-calculate-moving-average

Very good idea about adding a column of data...that really just skips the need to capture the array. Thinking about it now, I would just process the dataframe and then replace it with the values I need on a daily basis.

Hi Winston ,

See if this works for you. Just make sure to spot check a few values to make sure my logic is right.

from collections import deque

def initialize(context):

    context.vix_fields = ['Adj Close', 'High', 'Low', 'Open']  
    context.vix = {}  
    for field in context.vix_fields:  
        context.vix[field] = deque([], 5)

    url = 'http://ichart.finance.yahoo.com/'  
    url += 'table.csv?s=%5EVIX&d=0&e=1&f=2017&g=d&a=0&b=1&c=2010&ignore=.csv"'  
    fetch_csv(url,  
              date_column='Date',  
              date_format='%Y-%m-%d',  
              symbol='vix',  
              usecols=context.vix_fields) 

def before_trading_start(context, data):

    for field in context.vix_fields:  
        val = data.current('vix', field)  
        context.vix[field].appendleft(val)

    # today's open  
    print 'open - %.2f' % context.vix['Open'][0]

    # yesterday's high/low (check that we have more than one day's data)  
    if len(context.vix['Open']) > 1:  
        print 'yesterday high/low - %.2f/%.2f' % (context.vix['High'][1], context.vix['Low'][1])  

Hi Winston and thanks :-)

Here, I will give you the cliff notes:

initialize:
- create array of csv fields at context.vix_fields
- create dictionary at context.vix
- initialize dictionary with deques (see https://docs.python.org/2/library/collections.html#deque-objects)
- make call for csv file

before_trading_starts:
- loop through field names, get data value for each field and add to context.vix dict

context.vix contains last five days of data. You can change deque([], 5) to some other number.

Hi Winston,

this works:

    if len(context.vix['Adj Close']) > 1:  
        vix_close_yesterday = context.vix['Adj Close'][1]  
        print vix_close_yesterday  

basically, what is happening is we are creating the data structure in the initialization function. The data structure looks like this:

context.vix = {  
    'Open': deque([], 5),  
    'High': deque([], 5),  
    'Low': deque([], 5),  
    'Adj Close': deque([], 5)  
}

For reference:
https://docs.python.org/2/tutorial/datastructures.html#dictionaries
https://docs.python.org/2/library/collections.html#deque-objects

The deque is initialized with an empty list as a container and will be max length of 5 elements. Then in the before_trading_start function, which is called everyday, we are filling in the data. We get an index error if we try to access an element before the data has been filled in. Once we get the five elements, going on older values are dropped and new values are added.

So, at the start of the backtest, if we want to access context.vix['High'][1] first we need to test that context.vix['High'] has a length greater than one (since lists are indexed starting at zero). To access context.vix['High'][2], test that the deque has a length greater than two, etc. up to context.vix['High'][4] which needs a length greater than four because we want the fifth element.

If you need more than five days, change the deque number in the initialization function.

Hope that helps...it is harder to explain than it is to code :-)

Less fancy, which is sometimes better!

from collections import deque

def initialize(context):  
    context.vix = {  
        'Open': deque([], 5),  
        'High': deque([], 5),  
        'Low': deque([], 5),  
        'Adj Close': deque([], 5)  
    }  
    url = 'http://ichart.finance.yahoo.com/table.csv'  
    url += '?s=%5EVIX&d=0&e=1&f=2017&g=d&a=0&b=1&c=2010&ignore=.csv"'  
    fetch_csv(url, date_column='Date', symbol='vix')  

def before_trading_start(context, data):  
    for field in ['Open', 'High', 'Low', 'Adj Close']:  
        val = data.current('vix', field)  
        context.vix[field].appendleft(val)  

    print 'open - %.2f' % context.vix['Open'][0]  

    if len(context.vix['Open']) > 1:  
        print 'high - %.2f' % context.vix['High'][1]  
        print 'low - %.2f' % context.vix['Low'][1]  
        print 'close - %.2f' % context.vix['Adj Close'][1]  

Note that %.2f rounds the float. See:
https://docs.python.org/2/library/string.html#formatspec

Trouble with these methods is they take time to "warm up" in live trading. That becomes more tedious when you need 30+ days of history.

True, Simon. Winston only needs five days of data. Of course, if you need 30 days of data, you can do something like this:

def shift(df):  
    vals = df['Close'].shift(-30)  
    df['Last Month Close'] = vals  
    return df

def initialize(context):  
    url = 'http://ichart.finance.yahoo.com/table.csv'  
    url += '?s=%5EVIX&d=0&e=1&f=2017&g=d&a=0&b=1&c=2010&ignore=.csv"'  
    fetch_csv(url, date_column='Date', symbol='vix', pre_func=shift)  

def before_trading_start(context, data):  
    for field in ['Close', 'Last Month Close']:  
        val = data.current('vix', field)  
        print '%s - %.2f' % (field, val)  

There's more than one way to skin a cat :-)

Extend that to an arbitrary number of days in the past, and you might get a solution similar to the one I posted last year. :)

haha...I guess there is always someone reinventing the wheel :-)

Hey, thanks too, Winston...I also figured out a bunch of stuff working through this with you!