library(pacman)
p_load(tidyverse, fpp3)
Forecasting Models
Models
global_economy
# A tsibble: 15,150 x 9 [1Y]
# Key: Country [263]
Country Code Year GDP Growth CPI Imports Exports Population
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
7 Afghanistan AFG 1966 1399999967. NA NA 18.6 8.57 10152331
8 Afghanistan AFG 1967 1673333418. NA NA 14.2 6.77 10372630
9 Afghanistan AFG 1968 1373333367. NA NA 15.2 8.90 10604346
10 Afghanistan AFG 1969 1408888922. NA NA 15.0 10.1 10854428
# … with 15,140 more rows
%>% filter(Country == "Italy") %>%
global_economy autoplot()
Plot variable not specified, automatically selected `.vars = GDP`
<- global_economy %>%
gdppc_Italy filter(Country == "Italy") %>%
mutate(GDP_per_capita = GDP / Population)
%>% autoplot(GDP_per_capita) gdppc_Italy
Train the model (estimate)
<- global_economy %>%
gdppc mutate(GDP_per_capita = GDP / Population)
<- gdppc %>%
fit model(trend_model = TSLM(GDP_per_capita ~ trend()))
Warning: 7 errors (1 unique) encountered for trend_model
[7] 0 (non-NA) cases
fit
# A mable: 263 x 2
# Key: Country [263]
Country trend_model
<fct> <model>
1 Afghanistan <TSLM>
2 Albania <TSLM>
3 Algeria <TSLM>
4 American Samoa <TSLM>
5 Andorra <TSLM>
6 Angola <TSLM>
7 Antigua and Barbuda <TSLM>
8 Arab World <TSLM>
9 Argentina <TSLM>
10 Armenia <TSLM>
# … with 253 more rows
Forecasts
%>% forecast(h = "3 years") fit
# A fable: 789 x 5 [1Y]
# Key: Country, .model [263]
Country .model Year GDP_per_capita .mean
<fct> <chr> <dbl> <dist> <dbl>
1 Afghanistan trend_model 2018 N(526, 9653) 526.
2 Afghanistan trend_model 2019 N(534, 9689) 534.
3 Afghanistan trend_model 2020 N(542, 9727) 542.
4 Albania trend_model 2018 N(4716, 476419) 4716.
5 Albania trend_model 2019 N(4867, 481086) 4867.
6 Albania trend_model 2020 N(5018, 486012) 5018.
7 Algeria trend_model 2018 N(4410, 643094) 4410.
8 Algeria trend_model 2019 N(4489, 645311) 4489.
9 Algeria trend_model 2020 N(4568, 647602) 4568.
10 American Samoa trend_model 2018 N(12491, 652926) 12491.
# … with 779 more rows
%>%
fit forecast(h = "3 years") %>%
filter(Country == "Italy") %>%
autoplot(gdppc) +
labs(y = "$US", title = "GDP per capita for Sweden")
Simple forecasting methods
%>% filter_index("1970 Q1" ~ "2004 Q4") aus_production
# A tsibble: 140 x 7 [1Q]
Quarter Beer Tobacco Bricks Cement Electricity Gas
<qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1970 Q1 387 6807 386 1049 12328 12
2 1970 Q2 357 7612 428 1134 14493 18
3 1970 Q3 374 7862 434 1229 15664 23
4 1970 Q4 466 7126 417 1188 13781 20
5 1971 Q1 410 7255 385 1058 13299 19
6 1971 Q2 370 8076 433 1209 15230 23
7 1971 Q3 379 8405 453 1199 16667 28
8 1971 Q4 487 7974 436 1253 14484 24
9 1972 Q1 419 6500 399 1070 13838 24
10 1972 Q2 378 7119 461 1282 15919 34
# … with 130 more rows
# Set training data from 1992 to 2006
<- aus_production %>%
train filter_index("1992 Q1" ~ "2006 Q4")
# Fit the models
<- train %>%
beer_fit model(
Mean = MEAN(Beer),
`Naïve` = NAIVE(Beer),
`Seasonal naïve` = SNAIVE(Beer)
)# Generate forecasts for 14 quarters
<- beer_fit %>% forecast(h = 14)
beer_fc # Plot forecasts against actual values
%>%
beer_fc autoplot(train, level = NULL) +
autolayer(
filter_index(aus_production, "2007 Q1" ~ .),
color = "black"
+
) labs(
y = "Megalitres",
title = "Forecasts for quarterly beer production"
+
) guides(colour = guide_legend(title = "Forecast"))
Plot variable not specified, automatically selected `.vars = Beer`
Residuals and Fitted Values
<- augment(beer_fit)
aug aug
# A tsibble: 180 x 6 [1Q]
# Key: .model [3]
.model Quarter Beer .fitted .resid .innov
<chr> <qtr> <dbl> <dbl> <dbl> <dbl>
1 Mean 1992 Q1 443 436. 6.55 6.55
2 Mean 1992 Q2 410 436. -26.4 -26.4
3 Mean 1992 Q3 420 436. -16.4 -16.4
4 Mean 1992 Q4 532 436. 95.6 95.6
5 Mean 1993 Q1 433 436. -3.45 -3.45
6 Mean 1993 Q2 421 436. -15.4 -15.4
7 Mean 1993 Q3 410 436. -26.4 -26.4
8 Mean 1993 Q4 512 436. 75.6 75.6
9 Mean 1994 Q1 449 436. 12.6 12.6
10 Mean 1994 Q2 381 436. -55.4 -55.4
# … with 170 more rows
autoplot(aug, .innov) +
labs(x = "Day", y = "Residual",
title = "Residuals")
Warning: Removed 5 row(s) containing missing values (geom_path).
%>%
aug ggplot(aes(x = .innov)) +
geom_histogram() +
labs(title = "Histogram of residuals")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 5 rows containing non-finite values (stat_bin).
%>%
aug ACF(.innov) %>%
autoplot() +
labs(title = "ACF of residuals")
%>% features(.innov, box_pierce, lag = 10) aug
# A tibble: 3 × 3
.model bp_stat bp_pvalue
<chr> <dbl> <dbl>
1 Mean 157. 0
2 Naïve 133. 0
3 Seasonal naïve 31.3 0.000515
%>% features(.innov, ljung_box, lag = 10) aug
# A tibble: 3 × 3
.model lb_stat lb_pvalue
<chr> <dbl> <dbl>
1 Mean 180. 0
2 Naïve 153. 0
3 Seasonal naïve 35.1 0.000122