Below piece of code within R and Python show how to apply basic mean reversion strategy based on price spread (also log price spread) for Gold and USD Oil ETFs.
#
# R code
#
# load price data of Gold and Usd Oil ETF
g=read.csv("gold.csv", header=F)
o=read.csv("uso.csv", header=F)
# one month window length
wLen=22
len=dim(g)[1]
hedgeRatio=matrix(rep(0,len),len)
# to verify if spread is stationary
adfResP=0
# flag to enable log price
isLogPrice=0
for (t in wLen:len){
g_w=g[(t-wLen+1):t,1]
o_w=o[(t-wLen+1):t,1]
if (isLogPrice==1){
g_w=log(g_w)
o_w=log(o_w)
}
# linear regression
reg=lm(o_w~g_w)
# get hedge ratio
hedgeRatio[t]=reg$coefficients[2];
# verify if spread (residual) is stationary
adfRes=adf.test(reg$residuals, alternative='stationary')
# sum of p values
adfResP=adfResP+adfRes$p.value
}
# estimate mean p value
avgPValue=adfResP/(len-wLen)
# > 0.5261476
# as avg p value (0.5261476) indicates, actually, spread is not stationary, so strategy wont make much return.
portf=cbind(g,o)
sportf=portf
if (isLogPrice==1){
sportf=log(portf)
}
# estimate spread of portfolio = oil - headgeRatio*gold
spread=matrix(rowSums(cbind(-1*hedgeRatio,1)*sportf))
plot(spread[,1],type='l')
# trim N/A sections
start=wLen+1
hedgeRatio=hedgeRatio[start:len,1]
portf=portf[start:len,1:2]
spread=matrix(spread[start:len,1])
# negative Z score will be used as number of shares
# runmean and runsd are in caTools package
meanSpread=runmean(spread,wLen,endrule="constant")
stdSpread=runsd(spread,wLen,endrule="constant")
numUnits=-(spread-meanSpread)/stdSpread #
positions=cbind(numUnits,numUnits)*cbind(-1*hedgeRatio,1)*portf
# daily profit and loss
lagPortf=lags(portf,1)[,3:4]
lagPos=lags(positions,1)[,3:4]
pnl=rowSums(lagPos*(portf-lagPortf)/lagPortf);
# return is P&L divided by gross market value of portfolio
ret=tail(pnl,-1)/rowSums(abs(lagPos))
plot(cumprod(1+ret)-1,type='l')
# annual percentage rate
APR=prod(1+ret)^(252/length(ret))
# > 1.032342
sharpRatio=sqrt(252)*mean(ret)/stdev(ret)
# > 0.3713589
'''
Python code
Created on 29 Dec 2013
@author: deniz turan (denizstij@gmail.com)
'''
import numpy as np
import pandas as pd
from scipy.stats import linregress
o=pd.read_csv("uso.csv",header=0,names=["price"])
g=pd.read_csv("gold.csv",header=0,names=["price"])
len=o.price.count()
wLen=22
hedgeRatio= np.zeros((len,2))
for t in range(wLen, len):
o_w=o.price[t-wLen:t]
g_w=g.price[t-wLen:t]
slope, intercept, r, p, stderr = linregress(g_w, o_w)
hedgeRatio[t,0]=slope*-1
hedgeRatio[t,1]=1
portf=np.vstack((g.price,o.price)).T
# spread
spread=np.sum(np.multiply(portf,hedgeRatio),1)
# negative Z score will be used as number of shares
meanSpread=pd.rolling_mean(spread,wLen);
stdSpread=pd.rolling_std(spread,wLen);
numUnits=-(spread-meanSpread)/stdSpread #
#drop NaN values
start=wLen
g=g.drop(g.index[:start])
o=o.drop(o.index[:start])
hedgeRatio=hedgeRatio[start:,]
portf=portf[start:,]
spread=spread[start:,]
# number of units
numUnits=numUnits[start:,]
# position
positions=np.multiply(np.vstack((numUnits,numUnits)).T,np.multiply(portf,hedgeRatio))
# get lag 1
lagPortf=np.roll(portf,1,0);
lagPortf[0,]=lagPortf[1,];
lagPos=np.roll(positions,1,0);
lagPos[0,]=lagPos[1,];
spread=np.sum(np.multiply(portf,hedgeRatio),1)
pnl=np.sum(np.divide(np.multiply(lagPos,(portf-lagPortf)),lagPortf),1)
# return
ret=np.divide(pnl,np.sum(np.abs(lagPos),1))
APR=np.power(np.prod(1+ret),(252/float(np.size(ret,0))))
sharpRatio=np.sqrt(252)*float(np.mean(ret))/float(np.std(ret))
print " APR %f, sharpeRatio=%f" %( APR,sharpRatio)
Although, p-value for ADF test, APR (annual percentage rate) and sharpe ratio indicate, this strategy is not profitable, it is very basic strategy to apply.
No comments:
Post a Comment