# Mean reversion Spread Trading with Linear Regression # # Deniz Turan, (denizstij AT gmail DOT com), 19-Jan-2014 import numpy as np from scipy.stats import linregress R_P = 1 # refresh period in days W_L = 30 # window length in days def initialize(context): context.y=sid(14517) # EWC context.x=sid(14516) # EWA # for long and shorting context.max_notional = 1000000 context.min_notional = -1000000.0 # set a fixed slippage set_slippage(slippage.FixedSlippage(spread=0.01)) context.long=False; context.short=False; def handle_data(context, data): xpx=data[context.x].price ypx=data[context.y].price retVal=linearRegression(data,context) # lets dont do anything if we dont have enough data yet if retVal is None: return None hedgeRatio,intercept=retVal; spread=ypx-hedgeRatio*xpx data[context.y]['spread'] = spread record(ypx=ypx,spread=spread,xpx=xpx) # find moving average rVal=getMeanStd(data, context) # lets dont do anything if we dont have enough data yet if rVal is None: return meanSpread,stdSpread = rVal # zScore is the number of unit zScore=(spread-meanSpread)/stdSpread; QTY=1000 qtyX=-hedgeRatio*QTY*xpx; qtyY=QTY*ypx; entryZscore=1; exitZscore=0; if zScore < -entryZscore and canEnterLong(context): # enter long the spread order(context.y, qtyY) order(context.x, qtyX) context.long=True context.short=False if zScore > entryZscore and canEnterShort(context): # enter short the spread order(context.y, -qtyY) order(context.x, -qtyX) context.short=True context.long=False record(cash=context.portfolio.cash, stock=context.portfolio.positions_value) @batch_transform(window_length=W_L, refresh_period=R_P) def linearRegression(datapanel, context): xpx = datapanel['price'][context.x] ypx = datapanel['price'][context.y] beta, intercept, r, p, stderr = linregress(ypx, xpx) # record(beta=beta, intercept=intercept) return (beta, intercept) @batch_transform(window_length=W_L, refresh_period=R_P) def getMeanStd(datapanel, context): spread = datapanel['spread'][context.y] meanSpread=spread.mean() stdSpread=spread.std() if meanSpread is not None and stdSpread is not None : return (meanSpread, stdSpread) else: return None def canEnterLong(context): notional=context.portfolio.positions_value if notional < context.max_notional and not context.long: # and not context.short: return True else: return False def canEnterShort(context): notional=context.portfolio.positions_value if notional > context.max_notional and not context.short: #and not context.short: return True else: return False
Showing posts with label Python. Show all posts
Showing posts with label Python. Show all posts
Sunday, January 19, 2014
Mean reversion with Linear Regression and Bollinger Band for Spread Trading within Python
Following code demonstrates how to utilize to linear regression to estimate hedge ratio and Bollinger band for spread trading. The code can be back tested at Quantopian.com
Mean reversion with Kalman Filter as Dynamic Linear Regression for Spread Trading within Python
Following code demonstrates how to utilize to kalman filter to estimate hedge ratio for spread trading. The code can be back tested at Quantopian.com
# Mean reversion with Kalman Filter as Dynamic Linear Regression # # Following algorithm trades based on mean reversion logic of spread # between cointegrated securities by using Kalman Filter as # Dynamic Linear Regression. Kalman filter is used here to estimate hedge (beta) # # Kalman Filter structure # # - measurement equation (linear regression): # y= beta*x+err # err is a guassian noise # # - Prediction model: # beta(t) = beta(t-1) + w(t-1) # w is a guassian noise # Beta is here our hedge unit. # # - Prediction section # beta_hat(t|t-1)=beta_hat(t-1|t-1) # beta_hat is expected value of beta # P(t|t-1)=P(t-1|t-1) + V_w # prediction error, which is cov(beta-beta_hat) # y_hat(t)=beta_hat(t|t-1)*x(t) # measurement prediction # err(t)=y(t)-y_hat(t) # forecast error # Q(t)=x(t)'*P(t|t-1)*x(t) + V_e # variance of forecast error, var(err(t)) # # - Update section # K(t)=R(t|t-1)*x(t)/Q(t) # Kalman filter between 0 and 1 # beta_hat(t|t)=beta_hat(t|t-1)+ K*err(t) # State update # P(t|t)=P(t|t-1)(1-K*x(t)) # State covariance update # # Deniz Turan, (denizstij AT gmail DOT com), 19-Jan-2014 # import numpy as np # Initialization logic def initialize(context): context.x=sid(14517) # EWC context.y=sid(14516) # EWA # for long and shorting context.max_notional = 1000000 context.min_notional = -1000000.0 # set a fixed slippage set_slippage(slippage.FixedSlippage(spread=0.01)) # between 0 and 1 where 1 means fastes change in beta, #whereas small values indicates liniar regression delta = 0.0001 context.Vw=delta/(1-delta)*np.eye(2); # default peridiction error variance context.Ve=0.001; # beta, holds slope and intersection context.beta=np.zeros((2,1)); context.postBeta=np.zeros((2,1)); # previous beta # covariance of error between projected beta and beta # cov (beta-priorBeta) = E[(beta-priorBeta)(beta-priorBeta)'] context.P=np.zeros((2,2)); context.priorP=np.ones((2,2)); context.started=False; context.warmupPeriod=3 context.warmupCount=0 context.long=False; context.short=False; # Will be called on every trade event for the securities specified. def handle_data(context, data): ########################################## # Prediction ########################################## if context.started: # state prediction context.beta=context.postBeta; #prior P prediction context.priorP=context.P+context.Vw else: context.started=True; xpx=np.mat([[1,data[context.x].price]]) ypx=data[context.y].price # projected y yhat=np.dot(xpx,context.beta)[0,0] # prediction error err=(ypx-yhat); # variance of err, var(err) Q=(np.dot(np.dot(xpx,context.priorP),xpx.T)+context.Ve)[0,0] # Kalman gain K=(np.dot(context.priorP,xpx.T)/Q)[0,0] ########################################## # Update section ########################################## context.postBeta=context.beta + np.dot(K,err) context.warmupCount+=1 if context.warmupPeriod > context.warmupCount: return #order(sid(24), 50) message='started: {st}, xprice: {xpx}, yprice: {ypx},\ yhat:{yhat} beta: {b}, postBeta: {pBeta} err: {e}, Q: {Q}, K: {K}' message= message.format(st=context.started,xpx=xpx,ypx=ypx,\ yhat=yhat, b=context.beta, \ pBeta=context.postBeta, e=err, Q=Q, K=K) log.info(message) # record(xpx=data[context.x].price, ypx=data[context.y].price,err=err, yhat=yhat, beta=context.beta[1,0]) ########################################## # Trading section # Spread (y-beta*x) is traded ########################################## QTY=1000 qtyX=-context.beta[1,0]*xpx[0,1]*QTY; qtyY=ypx*QTY; # similar to zscore in bollinger band stdQ=np.sqrt(Q) if err < -stdQ and canEnterLong(context): # enter long the spread order(context.y, qtyY) order(context.x, qtyX) context.long=True if err > -stdQ and canExitLong(context): # exit long the spread order(context.y, -qtyY) order(context.x, -qtyX) context.long=False if err > stdQ and canEnterShort(context): # enter short the spread order(context.y, -qtyY) order(context.x, -qtyX) context.short=True if err < stdQ and canExitShort(context): # exit short the spread order(context.y,qtyY) order(context.x,qtyX) context.short=False record(cash=context.portfolio.cash, stock=context.portfolio.positions_value) def canEnterLong(context): notional=context.portfolio.positions_value if notional < context.max_notional \ and not context.long and not context.short: return True else: return False def canExitLong(context): if context.long and not context.short: return True else: return False def canEnterShort(context): notional=context.portfolio.positions_value if notional > context.max_notional \ and not context.long and not context.short: return True else: return False def canExitShort(context): if context.short and not context.long: return True else: return False
Sunday, December 29, 2013
Price Spread based Mean Reversion Strategy within R and Python
Below piece of code within R and Python show how to apply basic mean reversion strategy based on price spread (also log price spread) for Gold and USD Oil ETFs.
# # R code # # load price data of Gold and Usd Oil ETF g=read.csv("gold.csv", header=F) o=read.csv("uso.csv", header=F) # one month window length wLen=22 len=dim(g)[1] hedgeRatio=matrix(rep(0,len),len) # to verify if spread is stationary adfResP=0 # flag to enable log price isLogPrice=0 for (t in wLen:len){ g_w=g[(t-wLen+1):t,1] o_w=o[(t-wLen+1):t,1] if (isLogPrice==1){ g_w=log(g_w) o_w=log(o_w) } # linear regression reg=lm(o_w~g_w) # get hedge ratio hedgeRatio[t]=reg$coefficients[2]; # verify if spread (residual) is stationary adfRes=adf.test(reg$residuals, alternative='stationary') # sum of p values adfResP=adfResP+adfRes$p.value } # estimate mean p value avgPValue=adfResP/(len-wLen) # > 0.5261476 # as avg p value (0.5261476) indicates, actually, spread is not stationary, so strategy wont make much return. portf=cbind(g,o) sportf=portf if (isLogPrice==1){ sportf=log(portf) } # estimate spread of portfolio = oil - headgeRatio*gold spread=matrix(rowSums(cbind(-1*hedgeRatio,1)*sportf)) plot(spread[,1],type='l') # trim N/A sections start=wLen+1 hedgeRatio=hedgeRatio[start:len,1] portf=portf[start:len,1:2] spread=matrix(spread[start:len,1]) # negative Z score will be used as number of shares # runmean and runsd are in caTools package meanSpread=runmean(spread,wLen,endrule="constant") stdSpread=runsd(spread,wLen,endrule="constant") numUnits=-(spread-meanSpread)/stdSpread # positions=cbind(numUnits,numUnits)*cbind(-1*hedgeRatio,1)*portf # daily profit and loss lagPortf=lags(portf,1)[,3:4] lagPos=lags(positions,1)[,3:4] pnl=rowSums(lagPos*(portf-lagPortf)/lagPortf); # return is P&L divided by gross market value of portfolio ret=tail(pnl,-1)/rowSums(abs(lagPos)) plot(cumprod(1+ret)-1,type='l') # annual percentage rate APR=prod(1+ret)^(252/length(ret)) # > 1.032342 sharpRatio=sqrt(252)*mean(ret)/stdev(ret) # > 0.3713589
''' Python code Created on 29 Dec 2013 @author: deniz turan (denizstij@gmail.com) ''' import numpy as np import pandas as pd from scipy.stats import linregress o=pd.read_csv("uso.csv",header=0,names=["price"]) g=pd.read_csv("gold.csv",header=0,names=["price"]) len=o.price.count() wLen=22 hedgeRatio= np.zeros((len,2)) for t in range(wLen, len): o_w=o.price[t-wLen:t] g_w=g.price[t-wLen:t] slope, intercept, r, p, stderr = linregress(g_w, o_w) hedgeRatio[t,0]=slope*-1 hedgeRatio[t,1]=1 portf=np.vstack((g.price,o.price)).T # spread spread=np.sum(np.multiply(portf,hedgeRatio),1) # negative Z score will be used as number of shares meanSpread=pd.rolling_mean(spread,wLen); stdSpread=pd.rolling_std(spread,wLen); numUnits=-(spread-meanSpread)/stdSpread # #drop NaN values start=wLen g=g.drop(g.index[:start]) o=o.drop(o.index[:start]) hedgeRatio=hedgeRatio[start:,] portf=portf[start:,] spread=spread[start:,] # number of units numUnits=numUnits[start:,] # position positions=np.multiply(np.vstack((numUnits,numUnits)).T,np.multiply(portf,hedgeRatio)) # get lag 1 lagPortf=np.roll(portf,1,0); lagPortf[0,]=lagPortf[1,]; lagPos=np.roll(positions,1,0); lagPos[0,]=lagPos[1,]; spread=np.sum(np.multiply(portf,hedgeRatio),1) pnl=np.sum(np.divide(np.multiply(lagPos,(portf-lagPortf)),lagPortf),1) # return ret=np.divide(pnl,np.sum(np.abs(lagPos),1)) APR=np.power(np.prod(1+ret),(252/float(np.size(ret,0)))) sharpRatio=np.sqrt(252)*float(np.mean(ret))/float(np.std(ret)) print " APR %f, sharpeRatio=%f" %( APR,sharpRatio)Although, p-value for ADF test, APR (annual percentage rate) and sharpe ratio indicate, this strategy is not profitable, it is very basic strategy to apply.
Labels:
ADF,
Algorithmic Trading,
finance,
Linear Regression,
Mean Reversion Strategies,
Python,
R
Friday, November 22, 2013
Java Garbage Collection Log File Scraper
''' Created by Deniz Turan Oracle Java (tested on 7), young generation garbage collection activity scraper. Extracts following fields from GC log file and save to a csv file. Count,LogTime,logGCOffsetTime,logGCOffsetTime2, YGPreSize,YGPostSize,YGTotalSize, YGElapsedTime, # Young generation OLDPreSize,OLDPostSize,OLDTotalSize,OLDElapsedTime # Old generation ''' from subprocess import call import glob import os logDir="C:\\temp\\gc\\" finalResultFileName=logDir+"finalResults.csv" filterExtension="*.log"; def getLogFileList(search_dir): files = filter(os.path.isfile, glob.glob(search_dir + filterExtension)) files.sort(key=lambda x: os.path.getmtime(x)) return files def openResultFile(): print "Creating result file : %s"% (finalResultFileName) # remove previous file call("rm "+finalResultFileName,shell=True) resultFileFD = open( finalResultFileName ,"a") ## create header resultFileFD.write("Count,LogTime,logGCOffsetTime,logGCOffsetTime2,") resultFileFD.write("YGPreSize,YGPostSize,YGTotalSize, YGElapsedTime,") resultFileFD.write("OLDPreSize,OLDPostSize,OLDTotalSize,OLDElapsedTime\n") return resultFileFD def closeResultFile(resultFileFD): print "Closing result file " resultFileFD.close(); def getFieldValue(strVal): index=strVal.index("K") index2=strVal.index("K", index+1) index3=strVal.index("K", index2+1) part1=strVal[:index] part2=strVal[index+3:index2] part3=strVal[index2+2:index3] return (part1,part2,part3) ##################################################### # Main ##################################################### if __name__ == '__main__': # prepare result file resultFileFD=openResultFile () print "Started to process log files" logFileList=getLogFileList(logDir) count=0 for f in logFileList: print "Processing GC Log file %s"%f logFD = open(f) line = logFD.readline() while (line != "" ): if "ParNew" in line : count=count+1 fields=line.split(" ") logTime=fields[0] logGCOffsetTime=fields[1] logGCOffsetTime2=fields[3] res=getFieldValue(fields[5]) YGPreSize,YGPostSize,YGTotalSize=res YGElapsedTime=fields[6] res=getFieldValue(fields[8]) OLDPreSize,OLDPostSize,OLDTotalSize=res OLDElapsedTime=fields[9] print line print "%d %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(count,logTime,logGCOffsetTime,logGCOffsetTime2, \ YGPreSize,YGPostSize,YGTotalSize, YGElapsedTime,\ OLDPreSize,OLDPostSize,OLDTotalSize,OLDElapsedTime) # print to file as CSV now resultFileFD.write("%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(count,logTime,logGCOffsetTime,logGCOffsetTime2, \ YGPreSize,YGPostSize,YGTotalSize, YGElapsedTime,\ OLDPreSize,OLDPostSize,OLDTotalSize,OLDElapsedTime)) line = logFD.readline() continue logFD.close(); closeResultFile(resultFileFD); print "finished processing log files" pass
Labels:
Garbage Collection Log File,
java,
Python
Monday, November 18, 2013
Simple Passive Momentum Trading with Bollinger Band
Below, you can see a simple trading algorithm based on momentum and bollinger band on Quantopian.com
# Simple Passive Momentum Trading with Bollinger Band import numpy as np import statsmodels.api as stat import statsmodels.tsa.stattools as ts # globals for batch transform decorator R_P = 1 # refresh period in days W_L = 30 # window length in days lookback=22 def initialize(context): context.stock = sid(24) # Apple (ignoring look-ahead bias) # for long and shorting context.max_notional = 1000000 context.min_notional = -1000000.0 # set a fixed slippage set_slippage(slippage.FixedSlippage(spread=0.01)) def handle_data(context, data): # find moving average rVal=getMeanStd(data) # lets dont do anything if we dont have enough data yet if rVal is None: return meanPrice,stdPrice = rVal price=data[context.stock].price notional = context.portfolio.positions[context.stock].amount * price # Passive momentum trading where for trading signal, Z-score is estimated h=((price-meanPrice)/stdPrice) # Bollinger band, if price is out of 2 std of moving mean, than lets trade if h>2 and notional < context.max_notional : # long order(context.stock,h*1000) if h<-2 and notional > context.min_notional: # short order(context.stock,h*1000) @batch_transform(window_length=W_L, refresh_period=R_P) def getMeanStd(datapanel): prices = datapanel['price'] meanPrice=prices.mean() stdPrice=prices.std() if meanPrice is not None and stdPrice is not None : return (meanPrice, stdPrice) else: return NoneScreen shot of the back testing result is: Click here to run algorithm on Quantopian.com.
Labels:
Algorithmic Trading,
Bollinger Band,
finance,
Momentum Trading,
Python,
Trading
Subscribe to:
Posts (Atom)