Monday, 17 August 2015

Quantopian Algorithmic Trading Strategy: Statistical Arbitrage (Pairs Trading)

What is Statistical Arbitrage?

Statistical arbitrage refers to where there is mispricing between securities which traders aim to profit from. It is based on mean reversion, as the divergence between the two stocks should be constant and any deviation away from such a constant spread is a trading opportunity since the spread is undergoing mean reversion. This means the two stocks we choose to focus on must be in the same sector as this takes away market influences and we can exploit the fundamental reason for believing the spread is mean reverting or constant. Mathematically, cointegration tests for whether stocks work well in a pairs trade as the error term in regression modelling is stationary. This is also why cointegration is important in time series analysis. A stationary relationship means the hedge ratio remains constant.


import numpy as np
import statsmodels.api as sm
import pandas as pd
from zipline.utils import tradingcalendar
import pytz


def initialize(context):
    set_slippage(slippage.VolumeShareSlippage(volume_limit=0.025, price_impact=0.1))
    set_commission(commission.PerShare(cost=0.0075, min_trade_cost=0.0))
    context.y = symbol('ANF')  #Abercombie & Fitch
    context.x = symbol('AEO')  #American Eagle Outfitters

Defining whether to use a hedge ratio calculated 2 days ago as we are trading off mean reversion- where a hedge ratio excludes 2 days of recency such as when there are large divergences which the algorithm hopes to exploit to be greater aligned to the economic historical relationship of the stock pair. A hedge ratio is defined as a ratio comparing the value of a position protected via a hedge with the size of the entire position itself. e.g. an investor with $1000 in foreign shares is exposed to currency risk though if $500 of the shares is hedged with a currency position, hedging ratio is 0.5- 50% of their equity position is protected from currency risk.

    context.use_hedge_ratio_lag = True
    context.hedge_ratio_lag = 2


    context.lookback = 20     # used for regression
    context.z_window = 20     # used for zscore calculation, must be <= lookback
    context.entry_z = 1.0      # trade entry triggered when spread is + or - entryZ
    context.exit_z = 0.0       # trade exit triggered when spread is + or - entryZ

    context.spread = np.array([])
    context.hedge_ratio_history = np.array([])
    context.in_long = False
    context.in_short = False

    if not context.use_hedge_ratio_lag:
        # a lag of 1 means to include the most recent price in the hedge_ratio calculation
        # specificlly, this is used for np.array[-1] indexing
        context.hedge_ratio_lag = 1
   
For every trade event...

def handle_data(context, data):
    if get_open_orders():
        return

    now = get_datetime()
    exchange_time = now.astimezone(pytz.timezone('US/Eastern'))

    # Only trade 30-minutes before market close
    if not (exchange_time.hour == 15 and exchange_time.minute == 30):
        return

    prices = history(35, '1d', 'price').iloc[-context.lookback::]

    y = prices[context.y]
    x = prices[context.x]

    try:
        hedge = hedge_ratio(y, x, add_const=True)  
    except ValueError as e:
        log.debug(e)
        return

    context.hedge_ratio_history = np.append(context.hedge_ratio_history, hedge)
    # Calculate the current day's spread and add it to the running tally
    if context.hedge_ratio_history.size < context.hedge_ratio_lag:
        return
    # Grab the previous day's hedgeRatio
    hedge = context.hedge_ratio_history[-context.hedge_ratio_lag]
    context.spread = np.append(context.spread, y[-1] - hedge * x[-1])

    if context.spread.size > context.z_window:
        # Keep only the z-score lookback period
        spreads = context.spread[-context.z_window:]
   
        zscore = (spreads[-1] - spreads.mean()) / spreads.std()
     
        if context.in_short and zscore < context.exit_z:
            order_target(context.y, 0)
            order_target(context.x, 0)
            context.in_short = False
            context.in_long = False
            record(stock_Y_pct=0, stock_X_pct=0)
            return
   
        if context.in_long and zscore > context.exit_z:
            order_target(context.y, 0)
            order_target(context.x, 0)
            context.in_short = False
            context.in_long = False
            record(stock_Y_pct=0, stock_X_pct=0)
            return
       
        if zscore < -context.entry_z and (not context.in_long):
            # Only trade if NOT already in a trade
            y_target_shares = 1
            x_target_shares = -hedge
            context.in_long = True
            context.in_short = False
       
            (y_target_pct, x_target_pct) = compute_holdings_pct(y_target_shares,
                                                                x_target_shares,
                                                                y[-1], x[-1] )
            order_target_percent(context.y, y_target_pct)
            order_target_percent(context.x, x_target_pct)
            record(stock_Y_pct=y_target_pct, stock_X_pct=x_target_pct)
            return

        if zscore > context.entry_z and (not context.in_short):
            # Only trade if NOT already in a trade
            y_target_shares = -1
            x_target_shares = hedge
            context.in_short = True
            context.in_long = False
       
            (y_target_pct, x_target_pct) = compute_holdings_pct(y_target_shares,
                                                                x_target_shares,
                                                                y[-1], x[-1] )
            order_target_percent(context.y, y_target_pct)
            order_target_percent(context.x, x_target_pct)
            record(stock_Y_pct=y_target_pct, stock_X_pct=x_target_pct)


def is_market_close(dt):
    ref = tradingcalendar.canonicalize_datetime(dt)
    return dt == tradingcalendar.open_and_closes.T[ref]['market_close']

def hedge_ratio(y, x, add_const=True):
    if add_const:
        x = sm.add_constant(x)
        model = sm.OLS(y, x).fit()
        return model.params[1]
    model = sm.OLS(y, x).fit()
    return model.params.values

def compute_holdings_pct(y_shares, x_shares, y_price, x_price):
    y_dollars = y_shares * y_price
    x_dollars = x_shares * x_price
    notional_dollars =  abs(y_dollars) + abs(x_dollars)
    y_target_pct = y_dollars / notional_dollars
    x_target_pct = x_dollars / notional_dollars
    return (y_target_pct, x_target_pct)




Specifying to only trade 5 (rather than 30) minutes before the market closes gives higher returns



Changing hedge ratio lag to 1 day generates returns of almost 50% over the year starting 2014, compared to the benchmark of 15%. 


Lookback period with hedge ratio lag of 1 day:

15 days - 38.7%
20 days - 49.5%
25 days - 18.2%

JP Morgan & Bank of America Pair Trading Strategy: 




Avon and Estee Lauder Pair Trading Strategy: 


No comments:

Post a Comment