Forecasting Models

Models

library(pacman)
p_load(tidyverse, fpp3)

global_economy

## # A tsibble: 15,150 x 9 [1Y]
## # Key:       Country [263]
##    Country     Code   Year         GDP Growth   CPI Imports Exports Population
##    <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
##  1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
##  2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
##  3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
##  4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
##  5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
##  6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414
##  7 Afghanistan AFG    1966 1399999967.     NA    NA   18.6     8.57   10152331
##  8 Afghanistan AFG    1967 1673333418.     NA    NA   14.2     6.77   10372630
##  9 Afghanistan AFG    1968 1373333367.     NA    NA   15.2     8.90   10604346
## 10 Afghanistan AFG    1969 1408888922.     NA    NA   15.0    10.1    10854428
## # … with 15,140 more rows

global_economy %>% filter(Country == "Italy") %>% 
  autoplot()

## Plot variable not specified, automatically selected `.vars = GDP`

gdppc_Italy <- global_economy %>%
  filter(Country == "Italy") %>% 
  mutate(GDP_per_capita = GDP / Population) 

gdppc_Italy %>% autoplot(GDP_per_capita)

Train the model (estimate)

gdppc <- global_economy %>%
  mutate(GDP_per_capita = GDP / Population) 

fit <- gdppc %>%
  model(trend_model = TSLM(GDP_per_capita ~ trend()))

## Warning: 7 errors (1 unique) encountered for trend_model
## [7] 0 (non-NA) cases

fit

## # A mable: 263 x 2
## # Key:     Country [263]
##    Country             trend_model
##    <fct>                   <model>
##  1 Afghanistan              <TSLM>
##  2 Albania                  <TSLM>
##  3 Algeria                  <TSLM>
##  4 American Samoa           <TSLM>
##  5 Andorra                  <TSLM>
##  6 Angola                   <TSLM>
##  7 Antigua and Barbuda      <TSLM>
##  8 Arab World               <TSLM>
##  9 Argentina                <TSLM>
## 10 Armenia                  <TSLM>
## # … with 253 more rows

Forecasts

fit %>% forecast(h = "3 years")

## # A fable: 789 x 5 [1Y]
## # Key:     Country, .model [263]
##    Country        .model       Year   GDP_per_capita  .mean
##    <fct>          <chr>       <dbl>           <dist>  <dbl>
##  1 Afghanistan    trend_model  2018     N(526, 9653)   526.
##  2 Afghanistan    trend_model  2019     N(534, 9689)   534.
##  3 Afghanistan    trend_model  2020     N(542, 9727)   542.
##  4 Albania        trend_model  2018  N(4716, 476419)  4716.
##  5 Albania        trend_model  2019  N(4867, 481086)  4867.
##  6 Albania        trend_model  2020  N(5018, 486012)  5018.
##  7 Algeria        trend_model  2018  N(4410, 643094)  4410.
##  8 Algeria        trend_model  2019  N(4489, 645311)  4489.
##  9 Algeria        trend_model  2020  N(4568, 647602)  4568.
## 10 American Samoa trend_model  2018 N(12491, 652926) 12491.
## # … with 779 more rows

fit %>%
  forecast(h = "3 years") %>%
  filter(Country == "Italy") %>%
  autoplot(gdppc) +
  labs(y = "$US", title = "GDP per capita for Sweden")

Simple forecasting methods

aus_production %>% filter_index("1970 Q1" ~ "2004 Q4")

## # A tsibble: 140 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 1970 Q1   387    6807    386   1049       12328    12
##  2 1970 Q2   357    7612    428   1134       14493    18
##  3 1970 Q3   374    7862    434   1229       15664    23
##  4 1970 Q4   466    7126    417   1188       13781    20
##  5 1971 Q1   410    7255    385   1058       13299    19
##  6 1971 Q2   370    8076    433   1209       15230    23
##  7 1971 Q3   379    8405    453   1199       16667    28
##  8 1971 Q4   487    7974    436   1253       14484    24
##  9 1972 Q1   419    6500    399   1070       13838    24
## 10 1972 Q2   378    7119    461   1282       15919    34
## # … with 130 more rows

# Set training data from 1992 to 2006
train <- aus_production %>% 
  filter_index("1992 Q1" ~ "2006 Q4")
# Fit the models
beer_fit <- train %>%
  model(
    Mean = MEAN(Beer),
    `Naïve` = NAIVE(Beer),
    `Seasonal naïve` = SNAIVE(Beer)
  )
# Generate forecasts for 14 quarters
beer_fc <- beer_fit %>% forecast(h = 14)
# Plot forecasts against actual values
beer_fc %>%
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(aus_production, "2007 Q1" ~ .), 
    color = "black"
  ) +
  labs(
    y = "Megalitres", 
    title = "Forecasts for quarterly beer production"
  ) +
  guides(colour = guide_legend(title = "Forecast"))

## Plot variable not specified, automatically selected `.vars = Beer`

Residuals and Fitted Values

aug <- augment(beer_fit)
aug

## # A tsibble: 180 x 6 [1Q]
## # Key:       .model [3]
##    .model Quarter  Beer .fitted .resid .innov
##    <chr>    <qtr> <dbl>   <dbl>  <dbl>  <dbl>
##  1 Mean   1992 Q1   443    436.   6.55   6.55
##  2 Mean   1992 Q2   410    436. -26.4  -26.4 
##  3 Mean   1992 Q3   420    436. -16.4  -16.4 
##  4 Mean   1992 Q4   532    436.  95.6   95.6 
##  5 Mean   1993 Q1   433    436.  -3.45  -3.45
##  6 Mean   1993 Q2   421    436. -15.4  -15.4 
##  7 Mean   1993 Q3   410    436. -26.4  -26.4 
##  8 Mean   1993 Q4   512    436.  75.6   75.6 
##  9 Mean   1994 Q1   449    436.  12.6   12.6 
## 10 Mean   1994 Q2   381    436. -55.4  -55.4 
## # … with 170 more rows

autoplot(aug, .innov) +
  labs(x = "Day", y = "Residual", 
       title = "Residuals")

## Warning: Removed 5 row(s) containing missing values (geom_path).

aug %>%
  ggplot(aes(x = .innov)) +
  geom_histogram() +
  labs(title = "Histogram of residuals")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 5 rows containing non-finite values (stat_bin).

aug %>%
  ACF(.innov) %>%
  autoplot() +
  labs(title = "ACF of residuals")

aug %>% features(.innov, box_pierce, lag = 10, dof = 0)

## # A tibble: 3 x 3
##   .model         bp_stat bp_pvalue
##   <chr>            <dbl>     <dbl>
## 1 Mean             157.   0       
## 2 Naïve            133.   0       
## 3 Seasonal naïve    31.3  0.000515

aug %>% features(.innov, ljung_box, lag = 10, dof = 0)

## # A tibble: 3 x 3
##   .model         lb_stat lb_pvalue
##   <chr>            <dbl>     <dbl>
## 1 Mean             180.   0       
## 2 Naïve            153.   0       
## 3 Seasonal naïve    35.1  0.000122

Forecasting Models

Eric A. Suess

2/17/2021

Models