Skip to content

Commit

Permalink
Merge pull request #18 from LAMPSPUC/add_residuals_diagnostics
Browse files Browse the repository at this point in the history
Add residuals diagnostics
  • Loading branch information
matheusaps authored Jul 23, 2024
2 parents 0e07734 + 96e6d5b commit 51798c0
Show file tree
Hide file tree
Showing 14 changed files with 3,344 additions and 3,528 deletions.
5 changes: 5 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,21 @@ authors = ["Matheus Alves <matheus03@alves@gmail.com>"]
version = "0.1.0"

[deps]
ARCHModels = "6d3278bc-c23a-5105-85e5-0d57d2bf684f"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StateSpaceModels = "99342f36-827c-5390-97c9-d7f9ee765c78"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
4,632 changes: 1,971 additions & 2,661 deletions examples/4_residuals_diagnostic_example.ipynb

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions src/UnobservedComponentsGAS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ module UnobservedComponentsGAS
using Random
using SpecialFunctions
using StateSpaceModels
# For residuals diagnostics
using Plots
using HypothesisTests
using StatsPlots
using StatsBase
using ARCHModels

# include("../NonParametricStructuralModels/src/NonParametricStructuralModels.jl")

Expand All @@ -23,6 +29,7 @@ module UnobservedComponentsGAS
include("components_dynamics.jl")
include("optimization.jl")
include("forecast.jl")
include("residuals_diagnostics.jl")

const DICT_CODE = Dict(1 => "Normal",
2 => "tLocationScale" )
Expand Down
224 changes: 224 additions & 0 deletions src/residuals_diagnostics.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
# Function to retrieve residuals from a fitted model
# Parameters:
# - fitted_model::Output: The model output object containing residuals
# - type::String="q": The type of residuals to retrieve ("q" for quantile, "std" for standardized, "cs" for conditional score)
# Returns:
# - The specified residuals as a vector or matrix
function get_residuals(output::Output; type::String="q")
fitted_model = deepcopy(output)
if type == "q"
return fitted_model.residuals["q_residuals"][2:end]
elseif type == "std"
return fitted_model.residuals["std_residuals"][2:end]
else
return fitted_model.residuals["cs_residuals"][2:end, :]
end
end

# Function to plot residuals from a fitted model
# Parameters:
# - fitted_model::Output: The model output object containing residuals
# - type::String="q": The type of residuals to plot ("q" for quantile, "std" for standardized, "cs" for conditional score)
# Returns:
# - A plot of the specified residuals
function plot_residuals(fitted_model::Output; type::String="q")

resid = get_residuals(fitted_model; type=type)

if type == "q"
name = "Quantile"
elseif type == "std"
name = "Standardized"
else
name = "Conditional Score"
end

if type == "cs"
num_params = length(fitted_model.fitted_params)
plots_cs = []
for n in 1:num_params
push!(plots_cs, plot(resid[:,n], label = "param $n",
title = "param $n", title_loc = :center))
end

if num_params == 2
plot(plots_cs[1], plots_cs[2], layout=(2,1), suptitle = "$name Residuals", size=(800,600))
else
plot(plots_cs[1], plots_cs[2], plots_cs[3], layout=(3,1), suptitle = "$name Residuals", size=(800,600))
end

else
plot(resid, title = "$name Residuals", label = "")
end
end

# Function to calculate the autocorrelation function (ACF) of residuals
# Parameters:
# - fitted_model::Output: The model output object containing residuals
# - lags::Int=25: The number of lags to include in the ACF calculation
# - type::String="q": The type of residuals to use ("q" for quantile, "std" for standardized)
# - squared::Bool=false: Whether to square the residuals before calculating ACF
# Returns:
# - A vector of ACF values
function get_acf_residuals(fitted_model::Output; lags::Int=25, type::String="q", squared::Bool=false)

resid = get_residuals(fitted_model; type=type)

squared == true ? resid = resid.^2 : nothing

return StatsBase.autocor(resid, 0:lags)
end

# Function to plot the autocorrelation function (ACF) of residuals
# Parameters:
# - output::Output: The model output object containing residuals
# - lags::Int=25: The number of lags to include in the ACF plot
# - type::String="q": The type of residuals to plot ("q" for quantile, "std" for standardized)
# - squared::Bool=false: Whether to square the residuals before calculating ACF
# Returns:
# - A plot of the ACF of the specified residuals
function plot_acf_residuals(output::Output; lags::Int=25, type::String="q", squared::Bool=false)
acf_values = get_acf_residuals(output; lags = lags, type = type, squared = squared)
resid = get_residuals(output; type=type)

if type == "q"
name = "Quantile"
elseif type == "std"
name = "Standardized"
end

squared ? is_squared = "Squared" : is_squared = ""

lag_values = collect(0:lags)
conf_interval = 1.96 / sqrt(length(resid))

plot(title="ACF $name $is_squared Residuals")
plot!(lag_values, acf_values, seriestype=:stem, label="", xticks=(lag_values, lag_values))
hline!([conf_interval, -conf_interval], line = (:red, :dash), label = "CI 95%")
end

# Function to plot a histogram of residuals
# Parameters:
# - output::Output: The model output object containing residuals
# - type::String="q": The type of residuals to plot ("q" for quantile, "std" for standardized)
# - bins::Int=20: The number of bins to use in the histogram
# Returns:
# - A histogram plot of the specified residuals
function plot_histogram(output::Output; type::String="q", bins::Int=20)
resid = get_residuals(output; type=type)

if type == "q"
name = "Quantile"
elseif type == "std"
name = "Standardized"
end

histogram(resid, title="Histogram $name Residuals", label = "", bins = bins)
end

# Function to plot a QQ plot of residuals
# Parameters:
# - output::Output: The model output object containing residuals
# - type::String="q": The type of residuals to plot ("q" for quantile, "std" for standardized)
# Returns:
# - A QQ plot of the specified residuals
function plot_qqplot(output::Output; type::String="q")
resid = get_residuals(output; type=type)

if type == "q"
name = "Quantile"
elseif type == "std"
name = "Standardized"
end

plot(qqplot(Normal, resid), title = "QQPlot $name Residuals")
end

# Function to perform the Jarque-Bera test for normality on residuals
# Parameters:
# - output::Output: The model output object containing residuals
# - type::String="q": The type of residuals to test ("q" for quantile, "std" for standardized)
# Returns:
# - A dictionary with the Jarque-Bera test statistic, p-value, skewness, and kurtosis
function jarquebera(output::Output; type::String="q")
resid = get_residuals(output; type=type)
jb = JarqueBeraTest(resid)
return Dict("stat" => jb.JB, "pvalue" => pvalue(jb), "skew" => jb.skew, "kurt" => jb.kurt)
end

# Function to perform the Ljung-Box test for autocorrelation on residuals
# Parameters:
# - output::Output: The model output object containing residuals
# - type::String="q": The type of residuals to test ("q" for quantile, "std" for standardized)
# - squared::Bool=false: Whether to square the residuals before testing
# - lags::Int=25: The number of lags to include in the test
# Returns:
# - A dictionary with the Ljung-Box test statistic and p-value
function ljungbox(output::Output; type::String="q", squared::Bool=false, lags::Int = 25)
resid = get_residuals(output; type=type)

squared ? resid = resid.^2 : nothing

lb = LjungBoxTest(resid, lags)
return Dict("stat" => lb.Q, "pvalue" => pvalue(lb))
end

# Function to perform the H test for variance on residuals
# Parameters:
# - output::Output: The model output object containing residuals
# - type::String="q": The type of residuals to test ("q" for quantile, "std" for standardized)
# Returns:
# - A dictionary with the F-statistic and p-value of the H test
function Htest(output::Output; type::String="q")
resid = get_residuals(output; type=type)

T = length(resid)
h = Int64(floor(T/3))

res1 = resid[1:h]
res3 = resid[2*h+1:end]

σ2_1 = var(res1; corrected=true)
σ2_3 = var(res3; corrected=true)

df1 = length(res1) - 1
df3 = length(res3) - 1

F_statistic = (σ2_3 / σ2_1)

p_value = 2* minimum([ccdf(FDist(df3, df1), F_statistic), cdf(FDist(df3, df1), F_statistic)])

return Dict("stat" => F_statistic, "pvalue" => p_value)
end

# Function to perform the ARCH test for autoregressive conditional heteroskedasticity on residuals
# Parameters:
# - output::Output: The model output object containing residuals
# - type::String="q": The type of residuals to test ("q" for quantile, "std" for standardized)
# - lags::Int=25: The number of lags to include in the test
# Returns:
# - A dictionary with the ARCH test statistic and p-value
function archtest(output::Output; type::String="q", lags::Int=25)
resid = get_residuals(output; type=type)
arch = ARCHLMTest(resid, lags)
return Dict("stat" => arch.LM, "pvalue" => pvalue(arch))
end

# Function to get p-values of multiple residuals diagnostics tests
# Parameters:
# - output::Output: The model output object containing residuals
# - lags::Int=25: The number of lags to include in the tests
# - type::String="q": The type of residuals to test ("q" for quantile, "std" for standardized)
# Returns:
# - A dictionary with p-values from various residuals diagnostics tests
function get_residuals_diagnosis_pvalues(output::Output; lags::Int=25, type::String="q")
jb = jarquebera(output; type = type)
lb = ljungbox(output; type = type, squared = false, lags = lags)
lb2 = ljungbox(output; type = type, squared = true, lags = lags)
H = Htest(output; type = type)
arch = archtest(output; type = type, lags = lags)

return Dict("JarqueBera" => jb["pvalue"], "HVariance" => H["pvalue"],
"LjungBox" => lb["pvalue"], "LjungBoxSquared" => lb2["pvalue"],
"ARCH" => arch["pvalue"])
end
17 changes: 17 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -515,3 +515,20 @@ function get_seasonality_dict_and_stochastic(seasonality::Vector{String})

return seasonality_dict, any(stochastic), stochastic
end

function get_in_sample_statistics(output::Output, y::Vector{Fl}) where Fl

y_pred = output.fit_in_sample
aic = output.information_criteria["aic"]
bic = output.information_criteria["bic"]
aicc = output.information_criteria["aicc"]

mse = mean((y .- y_pred).^2)
rmse = sqrt(mse)
mae = mean(abs.(y .- y_pred))
mape = mean(abs.((y .- y_pred) ./ y)) * 100

return Dict("MSE" => mse, "RMSE" => rmse,
"MAE" => mae, "MAPE" => mape,
"BIC" => bic, "AIC" => aic, "AICc" => aicc)
end
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Import all necessary packages
using UnobservedComponentsGAS
using Test, Random, Statistics, JuMP, Ipopt
using Test, Random, Statistics, JuMP, Ipopt, Plots
using JSON3, CSV, DataFrames, StateSpaceModels


Expand All @@ -12,5 +12,6 @@ include("test_components_dynamics.jl")
include("test_optimization.jl")
include("test_distributions.jl")
include("test_initialization.jl")
include("test_residuals_diagnostics.jl")


Loading

0 comments on commit 51798c0

Please sign in to comment.