library(gamlss)
# Read and prepare a weekly time series
logconstant <- 1
Y <- read.csv("data/insurance_weekly.csv") %>%
dplyr::select(Claims, Precipitation) %>%
mutate(Precipitation_lag1 = dplyr::lag(Precipitation, 1),
Week = 1:n(),
Year = rep(2002:2011, each = 52),
Claims_ln = log(Claims + logconstant)) %>%
mutate(Claims_ln_lag1 = dplyr::lag(Claims_ln, 1))
n <- nrow(Y)
# Set the forecast horizon (4 weeks ahead)
h_target <- 4
# Set the initial training size (the first 9 years)
n_train_initial <- 9 * 52
# Set confidence level for prediction intervals
C <- 0.95
lower_p <- (1 - C) / 2
upper_p <- 1 - lower_p
# Initialize storage for performance metrics
e_naive_h <- e_arima_h <- e_gamlss_xp_h <- e_gamlss_xa_h <- matrix(NA, nrow = 0, ncol = h_target)
c_naive_h <- c_arima_h <- c_gamlss_xp_h <- c_gamlss_xa_h <- matrix(NA, nrow = 0, ncol = h_target)
w_naive_h <- w_arima_h <- w_gamlss_xp_h <- w_gamlss_xa_h <- matrix(NA, nrow = 0, ncol = h_target)
# Loop over time points with seq(); can change the argument "by" to skip some points
for (i in seq(n_train_initial, n - h_target, by = 1)) {
# 1. Training and testing sets
Y_train <- Y[1:i, ]
Y_train_noNA <- na.omit(Y_train)
y_train <- ts(Y_train$Claims, frequency = 52)
Y_test <- Y[(i + 1):(i + h_target), ]
y_test <- Y_test$Claims
# 2. Fit models
m_arima <- forecast::auto.arima(y_train)
m_gamlss <- gamlss(Claims ~ ps(Precipitation) + Week + ps(Precipitation_lag1) + Claims_ln_lag1
,sigma.formula = ~ps(Week)
,family = NBI
,control = gamlss.control(c.crit = 0.01, trace = FALSE)
,data = Y_train_noNA)
# 3. Forecasts
# Seasonal naive forecast ("climatology" for the number of claims)
fc_naive <- forecast::snaive(y_train, level = C * 100, h = h_target)
# ARIMA forecast
fc_arima <- forecast::forecast(m_arima, h = h_target, level = C * 100)
# GAMLSS ex-post forecast, see ?gamlss::predict.gamlss
pred_mu <- predict(m_gamlss,
newdata = Y_test,
what = "mu",
type = "response")
pred_sigma <- predict(m_gamlss,
newdata = Y_test,
what = "sigma",
type = "response")
lower_bound <- qNBI(lower_p, mu = pred_mu, sigma = pred_sigma)
upper_bound <- qNBI(upper_p, mu = pred_mu, sigma = pred_sigma)
fc_gamlss_xp <- data.frame(mean = pred_mu,
lower = lower_bound,
upper = upper_bound)
# GAMLSS ex-ante forecast
# Simple precipitation forecast: use naive seasonal forecast
last_precip <- tail(Y_train$Precipitation, 1)
new_precip <- forecast::snaive(ts(Y_train$Precipitation, frequency = 52),
h = h_target)$mean %>%
as.vector()
Y_test_exante <- Y_test %>%
mutate(Precipitation = new_precip) %>%
mutate(Precipitation_lag1 = dplyr::lag(Precipitation, 1, default = last_precip))
# Since we have the lagged claims as one of the predictors (unknown when forecasting),
# we need to do recursive forecasts for GAMLSS ex-ante
lower_bound_xa <- upper_bound_xa <- pred_mu_xa <- rep(NA, h_target)
for (j in 1:h_target) {
pred_mu_1 <- predict(m_gamlss,
newdata = Y_test_exante[j, , drop = FALSE],
what = "mu",
type = "response")
pred_sigma_1 <- predict(m_gamlss,
newdata = Y_test_exante[j, , drop = FALSE],
what = "sigma",
type = "response")
pred_mu_xa[j] <- pred_mu_1
lower_bound_xa[j] <- qNBI(lower_p, mu = pred_mu_1, sigma = pred_sigma_1)
upper_bound_xa[j] <- qNBI(upper_p, mu = pred_mu_1, sigma = pred_sigma_1)
# Update lagged claims for next step
if (j < h_target) {
Y_test_exante$Claims_ln_lag1[j + 1] <- log(pred_mu_1 + logconstant)
}
}
fc_gamlss_xa <- data.frame(mean = pred_mu_xa,
lower = lower_bound_xa,
upper = upper_bound_xa)
# 4.1 Forecast errors
e_naive_h <- rbind(e_naive_h, y_test - fc_naive$mean)
e_arima_h <- rbind(e_arima_h, y_test - fc_arima$mean)
e_gamlss_xp_h <- rbind(e_gamlss_xp_h, y_test - fc_gamlss_xp$mean)
e_gamlss_xa_h <- rbind(e_gamlss_xa_h, y_test - fc_gamlss_xa$mean)
# 4.2 Forecast coverage
c_naive_h <- rbind(c_naive_h,
(y_test >= as.vector(fc_naive$lower)) &
(y_test <= as.vector(fc_naive$upper)))
c_arima_h <- rbind(c_arima_h,
(y_test >= as.vector(fc_arima$lower)) &
(y_test <= as.vector(fc_arima$upper)))
c_gamlss_xp_h <- rbind(c_gamlss_xp_h,
(y_test >= fc_gamlss_xp$lower) & (y_test <= fc_gamlss_xp$upper))
c_gamlss_xa_h <- rbind(c_gamlss_xa_h,
(y_test >= fc_gamlss_xa$lower) & (y_test <= fc_gamlss_xa$upper))
# 4.3 Forecast interval width
w_naive_h <- rbind(w_naive_h, as.vector(fc_naive$upper) - as.vector(fc_naive$lower))
w_arima_h <- rbind(w_arima_h, as.vector(fc_arima$upper) - as.vector(fc_arima$lower))
w_gamlss_xp_h <- rbind(w_gamlss_xp_h, fc_gamlss_xp$upper - fc_gamlss_xp$lower)
w_gamlss_xa_h <- rbind(w_gamlss_xa_h, fc_gamlss_xa$upper - fc_gamlss_xa$lower)
}