"""
Created on Sat Jan-03-2015
@author: Deniz Turan (http://denizstij.blogspot.co.uk/)
"""
import numpy as np
import statsmodels.api as sm
from scipy.interpolate import interp1d
def kpssTest(x, regression="LEVEL",lshort = True):
"""
KPSS Test for Stationarity
Computes the Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test for the null hypothesis that x is level or trend stationary.
Parameters
----------
x : array_like, 1d
data series
regression : str {'LEVEL','TREND'}
Indicates the null hypothesis and must be one of "Level" (default) or "Trend".
lshort : bool
a logical indicating whether the short or long version of the truncation lag parameter is used.
Returns
-------
stat : float
Test statistic
pvalue : float
the p-value of the test.
usedlag : int
Number of lags used.
Notes
-----
Based on kpss.test function of tseries libraries in R.
To estimate sigma^2 the Newey-West estimator is used. If lshort is TRUE, then the truncation lag parameter is set to trunc(3*sqrt(n)/13), otherwise trunc(10*sqrt(n)/14) is used. The p-values are interpolated from Table 1 of Kwiatkowski et al. (1992). If the computed statistic is outside the table of critical values, then a warning message is generated.
Missing values are not handled.
References
----------
D. Kwiatkowski, P. C. B. Phillips, P. Schmidt, and Y. Shin (1992): Testing the Null Hypothesis of Stationarity against the Alternative of a Unit Root. Journal of Econometrics 54, 159--178.
Examples
--------
x=numpy.random.randn(1000) # is level stationary
kpssTest(x)
y=numpy.cumsum(x) # has unit root
kpssTest(y)
z=x+0.3*arange(1,len(x)+1) # is trend stationary
kpssTest(z,"TREND")
"""
x = np.asarray(x,float)
if len(x.shape)>1:
raise ValueError("x is not an array or univariate time series")
if regression not in ["LEVEL", "TREND"]:
raise ValueError(("regression option %s not understood") % regression)
n = x.shape[0]
if regression=="TREND":
t=range(1,n+1)
t=sm.add_constant(t)
res=sm.OLS(x,t).fit()
e=res.resid
table=[0.216, 0.176, 0.146, 0.119]
else:
t=np.ones(n)
res=sm.OLS(x,t).fit()
e=res.resid
table=[0.739, 0.574, 0.463, 0.347]
tablep=[0.01, 0.025, 0.05, 0.10]
s=np.cumsum(e)
eta=np.sum(np.power(s,2))/(np.power(n,2))
s2 = np.sum(np.power(e,2))/n
if lshort:
l=np.trunc(3*np.sqrt(n)/13)
else:
l=np.trunc(10*np.sqrt(n)/14)
usedlag =int(l)
s2=R_pp_sum(e,len(e),usedlag ,s2)
stat=eta/s2
pvalue , msg=approx(table, tablep, stat)
print "KPSS Test for ",regression," Stationarity\n"
print ("KPSS %s=%f" % (regression, stat))
print ("Truncation lag parameter=%d"% usedlag )
print ("p-value=%f"%pvalue )
if msg is not None:
print "\nWarning:",msg
return ( stat,pvalue , usedlag )
def R_pp_sum (u, n, l, s):
tmp1 = 0.0
for i in range(1,l+1):
tmp2 = 0.0
for j in range(i,n):
tmp2 += u[j]*u[j-i]
tmp2 = tmp2*(1.0-(float(i)/((float(l)+1.0))))
tmp1 = tmp1+tmp2
tmp1 = tmp1/float(n)
tmp1 = tmp1*2.0
return s + tmp1
def approx(x,y,v):
if (v>x[0]):
return (y[0],"p-value smaller than printed p-value")
if (v
Saturday, January 03, 2015
Stationarity Test with KPSS (Kwiatkowski–Phillips–Schmidt–Shin) in Python
In addition to augmented Dickey–Fuller test (ADF), KPSS (Kwiatkowski–Phillips–Schmidt–Shin) is widely used to verify stationarity of a signal. Python statsmodels contains ADF test, but I could not find any implementation of KPSS in python. Therefore, I converted R implementation of kpss.test in tseries library to python as follows (Click here to download the source code) :
Subscribe to:
Post Comments (Atom)
3 comments:
Thank you! You should submit this to statsmodels.
I had to change the string handling in approx(). I am guessing this was done in python3 (I am using python2).
Very helpful. Thank you for this. Ricardo, if you don't mind, what change did you make to the approx function to make it compatible with Python 2?
Hi Michael, sorry, I just tested, and it does work fine in Python 2! Not sure what I was thinking hehe...
Post a Comment