LAMPSPUC · guilhermebodin · Jun 8, 2021 · Jun 8, 2021 · Jun 8, 2021 · Jun 8, 2021
diff --git a/src/StateSpaceModels.jl b/src/StateSpaceModels.jl
@@ -21,6 +21,7 @@ include("datasets.jl")
 include("hyperparameters.jl")
 include("systems.jl")
 include("kalman_filter_and_smoother.jl")
+include("kpss.jl")
 
 include("filters/univariate_kalman_filter.jl")
 include("filters/multivariate_kalman_filter.jl")
@@ -35,6 +36,7 @@ include("prints.jl")
 include("forecast.jl")
 include("cross_validation.jl")
 
+
 include("models/common.jl")
 include("models/locallevel.jl")
 include("models/locallevelcycle.jl")

diff --git a/src/kpss.jl b/src/kpss.jl
@@ -0,0 +1,83 @@
+function calc_lag_kpss(lags::Bool, n::Int64)
+    return lags ? trunc(Int, 4*(n/100)^0.25) : trunc(Int, 12*(n/100)^0.25)
+end
+
+function σ²_estimator(l::Int64, n::Int64, ϵ::Vector{Fl}) where Fl
+    return ((1/n)*sum(ϵ[t]^2 for t in 1:n)) + ((2/n)*sum((1-(s/(l + 1))) * (sum(ϵ[t]*ϵ[t - s] for t in s + 1:n)) for s in 1:l))
+end
+
+function p_value_weighted_average(η::Float64, p1::Float64, cv1::Float64, 
+                                  p2::Float64, cv2::Float64)
+    return ((p1*(cv1 - cv2)) - ((cv1 - η)*(p1 - p2)))/(cv1 - cv2)
+end
+
+function p_value_from_η(η::Float64, crit_vals::Vector{Float64})
+    if η >= crit_vals[1]
+        p_value = 0.01
+        @warn("p-value smaller than printed p-value")
+    elseif η < crit_vals[1] && η >= crit_vals[2]
+        p_value = p_value_weighted_average(η, 0.01, crit_vals[1], 0.025, crit_vals[2])
+    elseif η < crit_vals[2] && η >= crit_vals[3]
+        p_value = p_value_weighted_average(η, 0.025, crit_vals[2], 0.05, crit_vals[3])
+    elseif η < crit_vals[3] && η <= crit_vals[4]
+        p_value = p_value_weighted_average(η, 0.05, crit_vals[3], 0.1, crit_vals[4])
+    else
+        p_value = 0.1
+        @warn("p-value greater than printed p-value")
+    end
+    return p_value
+end
+
+function KPSSTest_trend(y::Vector{Fl}, lags::Bool) where Fl
+    η = calc_η_trend(y, lags)
+    crit_vals = [0.216, 0.176, 0.146, 0.119]
+    p_value = p_value_from_η(η, crit_vals)
+
+    return p_value
+end
+
+function KPSSTest_level(y::Vector{Fl}, lags::Bool) where Fl
+    η = calc_η_level(y, lags)
+    crit_vals = [0.739, 0.574, 0.463, 0.347]
+    p_value = p_value_from_η(η, crit_vals)
+
+    return p_value
+end
+
+function calc_η_trend(y::Vector{Fl}, lags::Bool) where Fl
+    n     = length(y)
+    zt    = collect(1:n)
+    r     = ones(n)
+    x     = hcat(r,zt)
+    β_est = (x'*x)\x'*y
+    ŷ     = x*β_est
+    ϵ     = y - ŷ
+    l     = calc_lag_kpss(lags, n)
+    η     = (1/n^2)*sum(sum((((sum(ϵ[i] for i in 1:t))^2)/σ²_estimator(l, n, ϵ)) for t in 1:n))
+
+    return η
+end
+
+function calc_η_level(y::Vector{Fl}, lags::Bool) where Fl
+    n     = length(y)
+    ϵ     = y .- (mean(y))
+    l     = calc_lag_kpss(lags, n)
+    η     = (1/n^2)*sum(sum((((sum(ϵ[i] for i in 1:t))^2)/σ²_estimator(l, n, ϵ)) for t in 1:n))
+
+    return η
+end
+
+
+function KPSSTest(y::Vector{Fl}; null::Symbol = :level, lags::Bool = true) where Fl
+    # Denis Kwiatkowski
+    # Peter C.B. Phillips
+    # Peter Schmidt and Yongcheol Shin
+    # "Testing the null hypothesis of stationarity
+    #  against the alternative of a unit root" 
+        #    Biometrika 71 (2) (August 1): 403-404.
+    # Journal of Econometrics 54 (1992) 159-178. North-Holland
+
+    @assert null in [:trend, :level]
+
+    return null == :trend ? KPSSTest_trend(y, lags) : KPSSTest_level(y, lags)
+end
diff --git a/src/models/sarima.jl b/src/models/sarima.jl
@@ -677,10 +677,21 @@ function model_name(model::SARIMA)
     return str
 end
 
-function repeated_kpss_test(y::Vector{Fl}) where Fl
-    # TODO
-    return 0
+function repeated_kpss_test(y::Vector{Fl}, max_d::Int64) where Fl
+    # TODO handle seasonal diff from d
+    seasonal_diff_y = y
+    p_value = KPSSTest(seasonal_diff_y)
+    d = 0
+
+    while p_value <= 0.01 && d < max_d
+       seasonal_diff_y = diff(seasonal_diff_y)
+       p_value = KPSSTest(seasonal_diff_y) 
+       d += 1
+    end
+
+    return d
 end
+
 function canova_hansen_test(y::Vector{Fl}) where Fl
     # TODO
     return 0
@@ -801,7 +812,7 @@ function auto_arima(y::Vector{Fl};
     @assert start_Q >= 0
     @assert information_criteria in ["aic", "aicc", "bic"]
 
-    d = repeated_kpss_test(y)
+    d = repeated_kpss_test(y, max_d)
     D = canova_hansen_test(y)
 
     candidate_models = SARIMA[]