Finance by M&M : Quantopian Algorithmic Trading Strategy: Statistical Arbitrage (Pairs Trading)

What is Statistical Arbitrage?

Statistical arbitrage refers to where there is mispricing between securities which traders aim to profit from. It is based on mean reversion, as the divergence between the two stocks should be constant and any deviation away from such a constant spread is a trading opportunity since the spread is undergoing mean reversion. This means the two stocks we choose to focus on must be in the same sector as this takes away market influences and we can exploit the fundamental reason for believing the spread is mean reverting or constant. Mathematically, cointegration tests for whether stocks work well in a pairs trade as the error term in regression modelling is stationary. This is also why cointegration is important in time series analysis. A stationary relationship means the hedge ratio remains constant.

import numpy as np
import statsmodels.api as sm
import pandas as pd
from zipline.utils import tradingcalendar
import pytz

def initialize(context):
set_slippage(slippage.VolumeShareSlippage(volume_limit=0.025, price_impact=0.1))
set_commission(commission.PerShare(cost=0.0075, min_trade_cost=0.0))
context.y = symbol('ANF') #Abercombie & Fitch
context.x = symbol('AEO') #American Eagle Outfitters

Defining whether to use a hedge ratio calculated 2 days ago as we are trading off mean reversion- where a hedge ratio excludes 2 days of recency such as when there are large divergences which the algorithm hopes to exploit to be greater aligned to the economic historical relationship of the stock pair. A hedge ratio is defined as a ratio comparing the value of a position protected via a hedge with the size of the entire position itself. e.g. an investor with $1000 in foreign shares is exposed to currency risk though if $500 of the shares is hedged with a currency position, hedging ratio is 0.5- 50% of their equity position is protected from currency risk.

context.use_hedge_ratio_lag = True
context.hedge_ratio_lag = 2

context.lookback = 20 # used for regression
context.z_window = 20 # used for zscore calculation, must be <= lookback
context.entry_z = 1.0 # trade entry triggered when spread is + or - entryZ
context.exit_z = 0.0 # trade exit triggered when spread is + or - entryZ

context.spread = np.array([])
context.hedge_ratio_history = np.array([])
context.in_long = False
context.in_short = False

if not context.use_hedge_ratio_lag:
# a lag of 1 means to include the most recent price in the hedge_ratio calculation
# specificlly, this is used for np.array[-1] indexing
context.hedge_ratio_lag = 1

For every trade event...

def handle_data(context, data):
if get_open_orders():
return

now = get_datetime()
exchange_time = now.astimezone(pytz.timezone('US/Eastern'))

# Only trade 30-minutes before market close
if not (exchange_time.hour == 15 and exchange_time.minute == 30):
return

prices = history(35, '1d', 'price').iloc[-context.lookback::]

y = prices[context.y]
x = prices[context.x]

try:
hedge = hedge_ratio(y, x, add_const=True)
except ValueError as e:
log.debug(e)
return

context.hedge_ratio_history = np.append(context.hedge_ratio_history, hedge)
# Calculate the current day's spread and add it to the running tally
if context.hedge_ratio_history.size < context.hedge_ratio_lag:
return
# Grab the previous day's hedgeRatio
hedge = context.hedge_ratio_history[-context.hedge_ratio_lag]
context.spread = np.append(context.spread, y[-1] - hedge * x[-1])

if context.spread.size > context.z_window:
# Keep only the z-score lookback period
spreads = context.spread[-context.z_window:]

zscore = (spreads[-1] - spreads.mean()) / spreads.std()

if context.in_short and zscore < context.exit_z:
order_target(context.y, 0)
order_target(context.x, 0)
context.in_short = False
context.in_long = False
record(stock_Y_pct=0, stock_X_pct=0)
return

if context.in_long and zscore > context.exit_z:
order_target(context.y, 0)
order_target(context.x, 0)
context.in_short = False
context.in_long = False
record(stock_Y_pct=0, stock_X_pct=0)
return

if zscore < -context.entry_z and (not context.in_long):
# Only trade if NOT already in a trade
y_target_shares = 1
x_target_shares = -hedge
context.in_long = True
context.in_short = False

(y_target_pct, x_target_pct) = compute_holdings_pct(y_target_shares,
x_target_shares,
y[-1], x[-1] )
order_target_percent(context.y, y_target_pct)
order_target_percent(context.x, x_target_pct)
record(stock_Y_pct=y_target_pct, stock_X_pct=x_target_pct)
return

if zscore > context.entry_z and (not context.in_short):
# Only trade if NOT already in a trade
y_target_shares = -1
x_target_shares = hedge
context.in_short = True
context.in_long = False

(y_target_pct, x_target_pct) = compute_holdings_pct(y_target_shares,
x_target_shares,
y[-1], x[-1] )
order_target_percent(context.y, y_target_pct)
order_target_percent(context.x, x_target_pct)
record(stock_Y_pct=y_target_pct, stock_X_pct=x_target_pct)

def is_market_close(dt):
ref = tradingcalendar.canonicalize_datetime(dt)
return dt == tradingcalendar.open_and_closes.T[ref]['market_close']

def hedge_ratio(y, x, add_const=True):
if add_const:
x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
return model.params[1]
model = sm.OLS(y, x).fit()
return model.params.values

def compute_holdings_pct(y_shares, x_shares, y_price, x_price):
y_dollars = y_shares * y_price
x_dollars = x_shares * x_price
notional_dollars = abs(y_dollars) + abs(x_dollars)
y_target_pct = y_dollars / notional_dollars
x_target_pct = x_dollars / notional_dollars
return (y_target_pct, x_target_pct)