Models2

Author

Prof. Eric A. Suess

Published

February 17, 2025

Model Google’s stock price.

library(pacman)
p_load(tidyverse, fpp3)

Re-index based on trading days

google_stock <- gafa_stock |>
  filter(Symbol == "GOOG", year(Date) >= 2015) |>
  mutate(day = row_number()) |>
  update_tsibble(index = day, regular = TRUE)

google_stock
ABCDEFGHIJ0123456789
Symbol
<chr>
Date
<date>
Open
<dbl>
High
<dbl>
Low
<dbl>
Close
<dbl>
Adj_Close
<dbl>
Volume
<dbl>
day
<int>
GOOG2015-01-02526.1147528.3624521.2316521.9377521.937714476001
GOOG2015-01-05520.3962521.4604510.2520511.0576511.057620598002
GOOG2015-01-06512.1815513.3500498.3078499.2128499.212828999003
GOOG2015-01-07504.2252504.4679496.9154498.3575498.357520651004
GOOG2015-01-08495.2645500.7245488.3128499.9289499.928933536005
GOOG2015-01-09501.9975502.1566492.0821493.4545493.454520694006
GOOG2015-01-12492.2312493.2616484.8916489.8543489.854323224007
GOOG2015-01-13496.1099500.2272489.6952493.4644493.464423705008
GOOG2015-01-14491.9428500.4759490.3018498.1288498.128822357009
GOOG2015-01-15502.8030502.9124495.0358499.0437499.0437271580010

Filter the year of interest

google_2015 <- google_stock |> filter(year(Date) == 2015)
google_2015 
ABCDEFGHIJ0123456789
Symbol
<chr>
Date
<date>
Open
<dbl>
High
<dbl>
Low
<dbl>
Close
<dbl>
Adj_Close
<dbl>
Volume
<dbl>
day
<int>
GOOG2015-01-02526.1147528.3624521.2316521.9377521.937714476001
GOOG2015-01-05520.3962521.4604510.2520511.0576511.057620598002
GOOG2015-01-06512.1815513.3500498.3078499.2128499.212828999003
GOOG2015-01-07504.2252504.4679496.9154498.3575498.357520651004
GOOG2015-01-08495.2645500.7245488.3128499.9289499.928933536005
GOOG2015-01-09501.9975502.1566492.0821493.4545493.454520694006
GOOG2015-01-12492.2312493.2616484.8916489.8543489.854323224007
GOOG2015-01-13496.1099500.2272489.6952493.4644493.464423705008
GOOG2015-01-14491.9428500.4759490.3018498.1288498.128822357009
GOOG2015-01-15502.8030502.9124495.0358499.0437499.0437271580010

Fit the models

google_fit <- google_2015 |>
  model(
    Mean = MEAN(Close),
    `Naïve` = NAIVE(Close),
    Drift = NAIVE(Close ~ drift())
  )
google_fit 
ABCDEFGHIJ0123456789
Symbol
<chr>
Mean
<lst_mdl>
Naïve
<lst_mdl>
Drift
<lst_mdl>
GOOG<lst_mdl><lst_mdl><lst_mdl>

Produce forecasts for the trading days in January 2016

google_jan_2016 <- google_stock |>
  filter(yearmonth(Date) == yearmonth("2016 Jan"))

google_fc <- google_fit |> 
  forecast(new_data = google_jan_2016)
google_fc
ABCDEFGHIJ0123456789
Symbol
<chr>
.model
<chr>
day
<int>
Close
<dist>
.mean
<dbl>
Date
<date>
Open
<dbl>
High
<dbl>
Low
<dbl>
Adj_Close
<dbl>
GOOGMean253<dist>601.55052016-01-04743.00744.060731.258741.84
GOOGMean254<dist>601.55052016-01-05746.45752.000738.640742.58
GOOGMean255<dist>601.55052016-01-06730.00747.180728.920743.62
GOOGMean256<dist>601.55052016-01-07730.31738.500719.060726.39
GOOGMean257<dist>601.55052016-01-08731.45733.230713.000714.47
GOOGMean258<dist>601.55052016-01-11716.61718.855703.540716.03
GOOGMean259<dist>601.55052016-01-12721.68728.750717.317726.07
GOOGMean260<dist>601.55052016-01-13730.85734.740698.610700.56
GOOGMean261<dist>601.55052016-01-14705.38721.925689.100714.72
GOOGMean262<dist>601.55052016-01-15692.29706.740685.370694.45

Plot the forecasts

google_fc |>
  autoplot(google_2015, level = NULL) +
  autolayer(google_jan_2016, Close, color = "black") +
  labs(x = "Day", y = "Closing Price (US$)",
       title = "Google stock prices (Jan 2015 - Jan 2016)") +
  guides(colour = guide_legend(title = "Forecast"))

augment(google_fit)
ABCDEFGHIJ0123456789
Symbol
<chr>
.model
<chr>
day
<int>
Close
<dbl>
.fitted
<dbl>
.resid
<dbl>
.innov
<dbl>
GOOGMean1521.9377601.5505-7.961280e+01-7.961280e+01
GOOGMean2511.0576601.5505-9.049293e+01-9.049293e+01
GOOGMean3499.2128601.5505-1.023377e+02-1.023377e+02
GOOGMean4498.3575601.5505-1.031930e+02-1.031930e+02
GOOGMean5499.9289601.5505-1.016217e+02-1.016217e+02
GOOGMean6493.4545601.5505-1.080960e+02-1.080960e+02
GOOGMean7489.8543601.5505-1.116962e+02-1.116962e+02
GOOGMean8493.4644601.5505-1.080861e+02-1.080861e+02
GOOGMean9498.1288601.5505-1.034218e+02-1.034218e+02
GOOGMean10499.0437601.5505-1.025068e+02-1.025068e+02

Residual diagnostics

A good forecasting method will yield innovation residuals with the following properties:

  1. The innovation residuals are uncorrelated. If there are correlations between innovation residuals, then there is information left in the residuals which should be used in computing forecasts.
  2. The innovation residuals have zero mean. If they have a mean other than zero, then the forecasts are biased.
  3. The innovation residuals have constant variance.
  4. The innovation residuals are normally distributed.
autoplot(google_2015, Close) +
  labs(x = "Day", y = "Closing Price (US$)",
       title = "Google Stock in 2015")

Fit the Naive model (jus t the mean) and augment the dataset with the residuals and innovations.

aug <- google_2015 |>
  model(NAIVE(Close)) |>
  augment()

autoplot(aug, .innov) +
  labs(x = "Day", y = "Residual",
       title = "Residuals from naïve method")
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).

Normal?

aug |>
  ggplot(aes(x = .innov)) +
  geom_histogram() +
  labs(title = "Histogram of residuals")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_bin()`).

aug |>
  ACF(.innov) |>
  autoplot() +
  labs(title = "ACF of residuals")

google_2015 |>
  model(NAIVE(Close)) |>
  gg_tsresiduals()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_bin()`).

Portmanteau test

From a French word describing a suitcase or coat rack carrying several items of clothing.

Test if there is at least one lagged autocorrelation is different from zero.

Box-Pierce

aug |> features(.innov, box_pierce, lag = 10)
ABCDEFGHIJ0123456789
Symbol
<chr>
.model
<chr>
bp_stat
<dbl>
bp_pvalue
<dbl>
GOOGNAIVE(Close)7.7445170.6537761

Ljung-Box

aug |> features(.innov, ljung_box, lag = 10)
ABCDEFGHIJ0123456789
Symbol
<chr>
.model
<chr>
lb_stat
<dbl>
lb_pvalue
<dbl>
GOOGNAIVE(Close)7.9141430.6372231