""" Created on Sat Jan-03-2015 @author: Deniz Turan (http://denizstij.blogspot.co.uk/) """ import numpy as np import statsmodels.api as sm from scipy.interpolate import interp1d def kpssTest(x, regression="LEVEL",lshort = True): """ KPSS Test for Stationarity Computes the Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test for the null hypothesis that x is level or trend stationary. Parameters ---------- x : array_like, 1d data series regression : str {'LEVEL','TREND'} Indicates the null hypothesis and must be one of "Level" (default) or "Trend". lshort : bool a logical indicating whether the short or long version of the truncation lag parameter is used. Returns ------- stat : float Test statistic pvalue : float the p-value of the test. usedlag : int Number of lags used. Notes ----- Based on kpss.test function of tseries libraries in R. To estimate sigma^2 the Newey-West estimator is used. If lshort is TRUE, then the truncation lag parameter is set to trunc(3*sqrt(n)/13), otherwise trunc(10*sqrt(n)/14) is used. The p-values are interpolated from Table 1 of Kwiatkowski et al. (1992). If the computed statistic is outside the table of critical values, then a warning message is generated. Missing values are not handled. References ---------- D. Kwiatkowski, P. C. B. Phillips, P. Schmidt, and Y. Shin (1992): Testing the Null Hypothesis of Stationarity against the Alternative of a Unit Root. Journal of Econometrics 54, 159--178. Examples -------- x=numpy.random.randn(1000) # is level stationary kpssTest(x) y=numpy.cumsum(x) # has unit root kpssTest(y) z=x+0.3*arange(1,len(x)+1) # is trend stationary kpssTest(z,"TREND") """ x = np.asarray(x,float) if len(x.shape)>1: raise ValueError("x is not an array or univariate time series") if regression not in ["LEVEL", "TREND"]: raise ValueError(("regression option %s not understood") % regression) n = x.shape[0] if regression=="TREND": t=range(1,n+1) t=sm.add_constant(t) res=sm.OLS(x,t).fit() e=res.resid table=[0.216, 0.176, 0.146, 0.119] else: t=np.ones(n) res=sm.OLS(x,t).fit() e=res.resid table=[0.739, 0.574, 0.463, 0.347] tablep=[0.01, 0.025, 0.05, 0.10] s=np.cumsum(e) eta=np.sum(np.power(s,2))/(np.power(n,2)) s2 = np.sum(np.power(e,2))/n if lshort: l=np.trunc(3*np.sqrt(n)/13) else: l=np.trunc(10*np.sqrt(n)/14) usedlag =int(l) s2=R_pp_sum(e,len(e),usedlag ,s2) stat=eta/s2 pvalue , msg=approx(table, tablep, stat) print "KPSS Test for ",regression," Stationarity\n" print ("KPSS %s=%f" % (regression, stat)) print ("Truncation lag parameter=%d"% usedlag ) print ("p-value=%f"%pvalue ) if msg is not None: print "\nWarning:",msg return ( stat,pvalue , usedlag ) def R_pp_sum (u, n, l, s): tmp1 = 0.0 for i in range(1,l+1): tmp2 = 0.0 for j in range(i,n): tmp2 += u[j]*u[j-i] tmp2 = tmp2*(1.0-(float(i)/((float(l)+1.0)))) tmp1 = tmp1+tmp2 tmp1 = tmp1/float(n) tmp1 = tmp1*2.0 return s + tmp1 def approx(x,y,v): if (v>x[0]): return (y[0],"p-value smaller than printed p-value") if (v
Saturday, January 03, 2015
Stationarity Test with KPSS (Kwiatkowski–Phillips–Schmidt–Shin) in Python
In addition to augmented Dickey–Fuller test (ADF), KPSS (Kwiatkowski–Phillips–Schmidt–Shin) is widely used to verify stationarity of a signal. Python statsmodels contains ADF test, but I could not find any implementation of KPSS in python. Therefore, I converted R implementation of kpss.test in tseries library to python as follows (Click here to download the source code) :
Subscribe to:
Post Comments (Atom)
3 comments:
Thank you! You should submit this to statsmodels.
I had to change the string handling in approx(). I am guessing this was done in python3 (I am using python2).
Very helpful. Thank you for this. Ricardo, if you don't mind, what change did you make to the approx function to make it compatible with Python 2?
Hi Michael, sorry, I just tested, and it does work fine in Python 2! Not sure what I was thinking hehe...
Post a Comment