Models2

Author

Prof. Eric A. Suess

Published

February 13, 2023

Model Google’s stock price.

library(pacman)
p_load(tidyverse, fpp3)

Re-index based on trading days

google_stock <- gafa_stock %>%
  filter(Symbol == "GOOG", year(Date) >= 2015) %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE)

google_stock
# A tsibble: 1,006 x 9 [1]
# Key:       Symbol [1]
   Symbol Date        Open  High   Low Close Adj_Close  Volume   day
   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl> <int>
 1 GOOG   2015-01-02  526.  528.  521.  522.      522. 1447600     1
 2 GOOG   2015-01-05  520.  521.  510.  511.      511. 2059800     2
 3 GOOG   2015-01-06  512.  513.  498.  499.      499. 2899900     3
 4 GOOG   2015-01-07  504.  504.  497.  498.      498. 2065100     4
 5 GOOG   2015-01-08  495.  501.  488.  500.      500. 3353600     5
 6 GOOG   2015-01-09  502.  502.  492.  493.      493. 2069400     6
 7 GOOG   2015-01-12  492.  493.  485.  490.      490. 2322400     7
 8 GOOG   2015-01-13  496.  500.  490.  493.      493. 2370500     8
 9 GOOG   2015-01-14  492.  500.  490.  498.      498. 2235700     9
10 GOOG   2015-01-15  503.  503.  495.  499.      499. 2715800    10
# … with 996 more rows

Filter the year of interest

google_2015 <- google_stock %>% filter(year(Date) == 2015)
google_2015 
# A tsibble: 252 x 9 [1]
# Key:       Symbol [1]
   Symbol Date        Open  High   Low Close Adj_Close  Volume   day
   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl> <int>
 1 GOOG   2015-01-02  526.  528.  521.  522.      522. 1447600     1
 2 GOOG   2015-01-05  520.  521.  510.  511.      511. 2059800     2
 3 GOOG   2015-01-06  512.  513.  498.  499.      499. 2899900     3
 4 GOOG   2015-01-07  504.  504.  497.  498.      498. 2065100     4
 5 GOOG   2015-01-08  495.  501.  488.  500.      500. 3353600     5
 6 GOOG   2015-01-09  502.  502.  492.  493.      493. 2069400     6
 7 GOOG   2015-01-12  492.  493.  485.  490.      490. 2322400     7
 8 GOOG   2015-01-13  496.  500.  490.  493.      493. 2370500     8
 9 GOOG   2015-01-14  492.  500.  490.  498.      498. 2235700     9
10 GOOG   2015-01-15  503.  503.  495.  499.      499. 2715800    10
# … with 242 more rows

Fit the models

google_fit <- google_2015 %>%
  model(
    Mean = MEAN(Close),
    `Naïve` = NAIVE(Close),
    Drift = NAIVE(Close ~ drift())
  )
google_fit 
# A mable: 1 x 4
# Key:     Symbol [1]
  Symbol    Mean   Naïve         Drift
  <chr>  <model> <model>       <model>
1 GOOG    <MEAN> <NAIVE> <RW w/ drift>

Produce forecasts for the trading days in January 2016

google_jan_2016 <- google_stock %>%
  filter(yearmonth(Date) == yearmonth("2016 Jan"))

google_fc <- google_fit %>% 
  forecast(new_data = google_jan_2016)
google_fc
# A fable: 57 x 11 [1]
# Key:     Symbol, .model [3]
   Symbol .model   day        Close .mean Date        Open  High   Low Adj_Close
   <chr>  <chr>  <int>       <dist> <dbl> <date>     <dbl> <dbl> <dbl>     <dbl>
 1 GOOG   Mean     253 N(602, 6766)  602. 2016-01-04  743   744.  731.      742.
 2 GOOG   Mean     254 N(602, 6766)  602. 2016-01-05  746.  752   739.      743.
 3 GOOG   Mean     255 N(602, 6766)  602. 2016-01-06  730   747.  729.      744.
 4 GOOG   Mean     256 N(602, 6766)  602. 2016-01-07  730.  738.  719.      726.
 5 GOOG   Mean     257 N(602, 6766)  602. 2016-01-08  731.  733.  713       714.
 6 GOOG   Mean     258 N(602, 6766)  602. 2016-01-11  717.  719.  704.      716.
 7 GOOG   Mean     259 N(602, 6766)  602. 2016-01-12  722.  729.  717.      726.
 8 GOOG   Mean     260 N(602, 6766)  602. 2016-01-13  731.  735.  699.      701.
 9 GOOG   Mean     261 N(602, 6766)  602. 2016-01-14  705.  722.  689.      715.
10 GOOG   Mean     262 N(602, 6766)  602. 2016-01-15  692.  707.  685.      694.
# … with 47 more rows, and 1 more variable: Volume <dbl>

Plot the forecasts

google_fc %>%
  autoplot(google_2015, level = NULL) +
  autolayer(google_jan_2016, Close, color = "black") +
  labs(x = "Day", y = "Closing Price (US$)",
       title = "Google stock prices (Jan 2015 - Jan 2016)") +
  guides(colour = guide_legend(title = "Forecast"))

augment(google_fit)
# A tsibble: 756 x 7 [1]
# Key:       Symbol, .model [3]
   Symbol .model   day Close .fitted .resid .innov
   <chr>  <chr>  <int> <dbl>   <dbl>  <dbl>  <dbl>
 1 GOOG   Mean       1  522.    602.  -79.6  -79.6
 2 GOOG   Mean       2  511.    602.  -90.5  -90.5
 3 GOOG   Mean       3  499.    602. -102.  -102. 
 4 GOOG   Mean       4  498.    602. -103.  -103. 
 5 GOOG   Mean       5  500.    602. -102.  -102. 
 6 GOOG   Mean       6  493.    602. -108.  -108. 
 7 GOOG   Mean       7  490.    602. -112.  -112. 
 8 GOOG   Mean       8  493.    602. -108.  -108. 
 9 GOOG   Mean       9  498.    602. -103.  -103. 
10 GOOG   Mean      10  499.    602. -103.  -103. 
# … with 746 more rows

Residual diagnostics

A good forecasting method will yield innovation residuals with the following properties:

  1. The innovation residuals are uncorrelated. If there are correlations between innovation residuals, then there is information left in the residuals which should be used in computing forecasts.
  2. The innovation residuals have zero mean. If they have a mean other than zero, then the forecasts are biased.
  3. The innovation residuals have constant variance.
  4. The innovation residuals are normally distributed.
autoplot(google_2015, Close) +
  labs(x = "Day", y = "Closing Price (US$)",
       title = "Google Stock in 2015")

Fit the Naive model (jus t the mean) and augment the dataset with the residuals and innovations.

aug <- google_2015 %>%
  model(NAIVE(Close)) %>%
  augment()

autoplot(aug, .innov) +
  labs(x = "Day", y = "Residual",
       title = "Residuals from naïve method")
Warning: Removed 1 row(s) containing missing values (geom_path).

Normal?

aug %>%
  ggplot(aes(x = .innov)) +
  geom_histogram() +
  labs(title = "Histogram of residuals")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 1 rows containing non-finite values (stat_bin).

aug %>%
  ACF(.innov) %>%
  autoplot() +
  labs(title = "ACF of residuals")

google_2015 %>%
  model(NAIVE(Close)) %>%
  gg_tsresiduals()
Warning: Removed 1 row(s) containing missing values (geom_path).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_bin).

Portmanteau test

From a French word describing a suitcase or coat rack carrying several items of clothing.

Test if there is at least one lagged autocorrelation is different from zero.

Box-Pierce

aug %>% features(.innov, box_pierce, lag = 10)
# A tibble: 1 × 4
  Symbol .model       bp_stat bp_pvalue
  <chr>  <chr>          <dbl>     <dbl>
1 GOOG   NAIVE(Close)    7.74     0.654

Ljung-Box

aug %>% features(.innov, ljung_box, lag = 10)
# A tibble: 1 × 4
  Symbol .model       lb_stat lb_pvalue
  <chr>  <chr>          <dbl>     <dbl>
1 GOOG   NAIVE(Close)    7.91     0.637