I am doing some time series forecasting analysis with the fable
and fabletools
package and I am interested in comparing the accuracy of individual models and also a mixed model (consisting of the individual models I am using).
Here is some example code with a mock dataframe:-
library(fable)
library(fabletools)
library(distributional)
library(tidyverse)
library(imputeTS)
#creating mock dataframe
set.seed(1)
Date<-seq(as.Date("2018-01-01"), as.Date("2021-03-19"), by = "1 day")
Count<-rnorm(length(Date),mean = 2086, sd= 728)
Count<-round(Count)
df<-data.frame(Date,Count)
df
#===================redoing with new model================
df$Count<-abs(df$Count)#in case there is any negative values, force them to be absolute
count_data<-as_tsibble(df)
count_data<-imputeTS::na.mean(count_data)
testfrac<-count_data%>%arrange(Date)%>%sample_frac(0.8)
lastdate<-last(testfrac$Date)
#train data
train <- count_data %>%
#sample_frac(0.8)
filter(Date<=as.Date(lastdate))
set.seed(1)
fit <- train %>%
model(
ets = ETS(Count),
arima = ARIMA(Count),
snaive = SNAIVE(Count),
croston= CROSTON(Count),
ave=MEAN(Count),
naive=NAIVE(Count),
neural=NNETAR(Count),
lm=TSLM(Count ~ trend()+season())
) %>%
mutate(mixed = (ets + arima + snaive + croston + ave + naive + neural + lm) /8)# creates a combined model using the averages of all individual models
fc <- fit %>% forecast(h = 7)
accuracy(fc,count_data)
fc_accuracy <- accuracy(fc, count_data,
measures = list(
point_accuracy_measures,
interval_accuracy_measures,
distribution_accuracy_measures
)
)
fc_accuracy
# A tibble: 9 x 13
# .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1 winkler percentile CRPS
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 arima Test -191. 983. 744. -38.1 51.8 0.939 0.967 -0.308 5769. 567. 561.
#2 ave Test -191. 983. 744. -38.1 51.8 0.939 0.967 -0.308 5765. 566. 561.
#3 croston Test -191. 983. 745. -38.2 51.9 0.940 0.968 -0.308 29788. 745. 745.
#4 ets Test -189. 983. 743. -38.0 51.7 0.938 0.967 -0.308 5759. 566. 560.
#5 lm Test -154. 1017. 742. -36.5 51.1 0.937 1.00 -0.307 6417. 583. 577.
#6 mixed Test -173. 997. 747. -36.8 51.1 0.944 0.981 -0.328 29897. 747. 747.
#7 naive Test 99.9 970. 612. -19.0 38.7 0.772 0.954 -0.308 7856. 692. 685.
#8 neural Test -322. 1139. 934. -49.6 66.3 1.18 1.12 -0.404 26361. 852. 848.
#9 snaive Test -244 1192. 896. -37.1 55.5 1.13 1.17 -0.244 4663. 690. 683.
I demonstrate how to create a mixed model. However, there can be some individual models which hamper the performance of a mixed model when added to it; in other words, the mixed model could be potentially improved if it did not include the individual models which skews the accuracy in a detrimental way.
Desired outcome
What I would like to achieve is to be able to test all of the possible combinations of individual models and returns the mixed model with the most optimum performance on one of the accuracy metrics, for instance, Mean Absolute Error (MAE). But I am not sure how to do this in an automated way as there are many potential combinations.
Can someone suggest or share some code as to how I could do this?
A couple of things to consider:
0.75 * ets + 0.25 * arima
. The possibilities are now literally endless, so you start to see the limitations of the brute-force method (NB I don't think fable
actually supports these kind of combinations yet though).That, said, here's one approach you could use to generate all the possible combinations. Note that this might take a prohibitively long time to run - but should give you what you're after.
# Get a table of models to get combinations from
fit <- train %>%
model(
ets = ETS(Count),
arima = ARIMA(Count),
snaive = SNAIVE(Count),
croston= CROSTON(Count),
ave=MEAN(Count),
naive=NAIVE(Count),
neural=NNETAR(Count),
lm=TSLM(Count ~ trend()+season())
)
# Start with a vector containing all the models we want to combine
models <- c("ets", "arima", "snaive", "croston", "ave", "naive", "neural", "lm")
# Generate a table of combinations - if a value is 1, that indicates that
# the model should be included in the combinations
combinations <- models %>%
purrr::set_names() %>%
map(~0:1) %>%
tidyr::crossing(!!!.)
combinations
#> # A tibble: 256 x 8
#> ets arima snaive croston ave naive neural lm
#> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1 0 0 0 0 0 0 0 0
#> 2 0 0 0 0 0 0 0 1
#> 3 0 0 0 0 0 0 1 0
#> 4 0 0 0 0 0 0 1 1
#> 5 0 0 0 0 0 1 0 0
#> 6 0 0 0 0 0 1 0 1
#> 7 0 0 0 0 0 1 1 0
#> 8 0 0 0 0 0 1 1 1
#> 9 0 0 0 0 1 0 0 0
#> 10 0 0 0 0 1 0 0 1
#> # ... with 246 more rows
# This just filters for combinations with at least 2 models
relevant_combinations <- combinations %>%
filter(rowSums(across()) > 1)
# We can use this table to generate the code we would put in a call to `mutate()`
# to generate the combination. {fable} does something funny with the ,
# meaning that more elegant approaches are more trouble
specs <- relevant_combinations %>%
mutate(id = row_number()) %>%
pivot_longer(-id, names_to = "model", values_to = "flag_present") %>%
filter(flag_present == 1) %>%
group_by(id) %>%
summarise(
desc = glue::glue_collapse(model, "_"),
model = glue::glue(
"({model_sums}) / {n_models}",
model_sums = glue::glue_collapse(model, " + "),
n_models = n()
)
) %>%
select(-id) %>%
pivot_wider(names_from = desc, values_from = model)
# This is what the `specs` table looks like:
specs
#> # A tibble: 1 x 247
#> neural_lm naive_lm naive_neural naive_neural_lm ave_lm ave_neural
#> <glue> <glue> <glue> <glue> <glue> <glue>
#> 1 (neural + lm) / 2 (naive +~ (naive + neu~ (naive + neural ~ (ave +~ (ave + ne~
#> # ... with 241 more variables: ave_neural_lm <glue>, ave_naive <glue>,
#> # ave_naive_lm <glue>, ave_naive_neural <glue>, ave_naive_neural_lm <glue>,
#> # croston_lm <glue>, croston_neural <glue>, croston_neural_lm <glue>,
#> # croston_naive <glue>, croston_naive_lm <glue>, croston_naive_neural <glue>,
#> # croston_naive_neural_lm <glue>, croston_ave <glue>, croston_ave_lm <glue>,
#> # croston_ave_neural <glue>, croston_ave_neural_lm <glue>,
#> # croston_ave_naive <glue>, croston_ave_naive_lm <glue>, ...
# We can combine our two tables and evaluate the generated code to produce
# combination models as follows:
combinations <- fit %>%
bind_cols(rename_with(specs, ~paste0("spec_", .))) %>%
mutate(across(starts_with("spec"), ~eval(parse(text = .))))
# Compute the accuracy for 2 random combinations to demonstrate:
combinations %>%
select(sample(seq_len(ncol(.)), 2)) %>%
forecast(h = 7) %>%
accuracy(count_data, measures = list(
point_accuracy_measures,
interval_accuracy_measures,
distribution_accuracy_measures
))
#> # A tibble: 2 x 13
#> .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1 winkler
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 spec_ets_arima~ Test -209. 1014. 771. -40.1 54.0 0.973 0.998 -0.327 30825.
#> 2 spec_ets_snaiv~ Test -145. 983. 726. -34.5 48.9 0.917 0.967 -0.316 29052.
#> # ... with 2 more variables: percentile <dbl>, CRPS <dbl>
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.