library(pacman)
p_load(tidyverse, fpp3)
Models2
Model Google’s stock price.
Re-index based on trading days
<- gafa_stock |>
google_stock filter(Symbol == "GOOG", year(Date) >= 2015) |>
mutate(day = row_number()) |>
update_tsibble(index = day, regular = TRUE)
google_stock
Symbol <chr> | Date <date> | Open <dbl> | High <dbl> | Low <dbl> | Close <dbl> | Adj_Close <dbl> | Volume <dbl> | day <int> |
---|---|---|---|---|---|---|---|---|
GOOG | 2015-01-02 | 526.1147 | 528.3624 | 521.2316 | 521.9377 | 521.9377 | 1447600 | 1 |
GOOG | 2015-01-05 | 520.3962 | 521.4604 | 510.2520 | 511.0576 | 511.0576 | 2059800 | 2 |
GOOG | 2015-01-06 | 512.1815 | 513.3500 | 498.3078 | 499.2128 | 499.2128 | 2899900 | 3 |
GOOG | 2015-01-07 | 504.2252 | 504.4679 | 496.9154 | 498.3575 | 498.3575 | 2065100 | 4 |
GOOG | 2015-01-08 | 495.2645 | 500.7245 | 488.3128 | 499.9289 | 499.9289 | 3353600 | 5 |
GOOG | 2015-01-09 | 501.9975 | 502.1566 | 492.0821 | 493.4545 | 493.4545 | 2069400 | 6 |
GOOG | 2015-01-12 | 492.2312 | 493.2616 | 484.8916 | 489.8543 | 489.8543 | 2322400 | 7 |
GOOG | 2015-01-13 | 496.1099 | 500.2272 | 489.6952 | 493.4644 | 493.4644 | 2370500 | 8 |
GOOG | 2015-01-14 | 491.9428 | 500.4759 | 490.3018 | 498.1288 | 498.1288 | 2235700 | 9 |
GOOG | 2015-01-15 | 502.8030 | 502.9124 | 495.0358 | 499.0437 | 499.0437 | 2715800 | 10 |
Filter the year of interest
<- google_stock |> filter(year(Date) == 2015)
google_2015 google_2015
Symbol <chr> | Date <date> | Open <dbl> | High <dbl> | Low <dbl> | Close <dbl> | Adj_Close <dbl> | Volume <dbl> | day <int> |
---|---|---|---|---|---|---|---|---|
GOOG | 2015-01-02 | 526.1147 | 528.3624 | 521.2316 | 521.9377 | 521.9377 | 1447600 | 1 |
GOOG | 2015-01-05 | 520.3962 | 521.4604 | 510.2520 | 511.0576 | 511.0576 | 2059800 | 2 |
GOOG | 2015-01-06 | 512.1815 | 513.3500 | 498.3078 | 499.2128 | 499.2128 | 2899900 | 3 |
GOOG | 2015-01-07 | 504.2252 | 504.4679 | 496.9154 | 498.3575 | 498.3575 | 2065100 | 4 |
GOOG | 2015-01-08 | 495.2645 | 500.7245 | 488.3128 | 499.9289 | 499.9289 | 3353600 | 5 |
GOOG | 2015-01-09 | 501.9975 | 502.1566 | 492.0821 | 493.4545 | 493.4545 | 2069400 | 6 |
GOOG | 2015-01-12 | 492.2312 | 493.2616 | 484.8916 | 489.8543 | 489.8543 | 2322400 | 7 |
GOOG | 2015-01-13 | 496.1099 | 500.2272 | 489.6952 | 493.4644 | 493.4644 | 2370500 | 8 |
GOOG | 2015-01-14 | 491.9428 | 500.4759 | 490.3018 | 498.1288 | 498.1288 | 2235700 | 9 |
GOOG | 2015-01-15 | 502.8030 | 502.9124 | 495.0358 | 499.0437 | 499.0437 | 2715800 | 10 |
Fit the models
<- google_2015 |>
google_fit model(
Mean = MEAN(Close),
`Naïve` = NAIVE(Close),
Drift = NAIVE(Close ~ drift())
) google_fit
Symbol <chr> | Mean <lst_mdl> | Naïve <lst_mdl> | Drift <lst_mdl> | |
---|---|---|---|---|
GOOG | <lst_mdl> | <lst_mdl> | <lst_mdl> |
Produce forecasts for the trading days in January 2016
<- google_stock |>
google_jan_2016 filter(yearmonth(Date) == yearmonth("2016 Jan"))
<- google_fit |>
google_fc forecast(new_data = google_jan_2016)
google_fc
Symbol <chr> | .model <chr> | day <int> | Close <dist> | .mean <dbl> | Date <date> | Open <dbl> | High <dbl> | Low <dbl> | Adj_Close <dbl> | |
---|---|---|---|---|---|---|---|---|---|---|
GOOG | Mean | 253 | <dist> | 601.5505 | 2016-01-04 | 743.00 | 744.060 | 731.258 | 741.84 | |
GOOG | Mean | 254 | <dist> | 601.5505 | 2016-01-05 | 746.45 | 752.000 | 738.640 | 742.58 | |
GOOG | Mean | 255 | <dist> | 601.5505 | 2016-01-06 | 730.00 | 747.180 | 728.920 | 743.62 | |
GOOG | Mean | 256 | <dist> | 601.5505 | 2016-01-07 | 730.31 | 738.500 | 719.060 | 726.39 | |
GOOG | Mean | 257 | <dist> | 601.5505 | 2016-01-08 | 731.45 | 733.230 | 713.000 | 714.47 | |
GOOG | Mean | 258 | <dist> | 601.5505 | 2016-01-11 | 716.61 | 718.855 | 703.540 | 716.03 | |
GOOG | Mean | 259 | <dist> | 601.5505 | 2016-01-12 | 721.68 | 728.750 | 717.317 | 726.07 | |
GOOG | Mean | 260 | <dist> | 601.5505 | 2016-01-13 | 730.85 | 734.740 | 698.610 | 700.56 | |
GOOG | Mean | 261 | <dist> | 601.5505 | 2016-01-14 | 705.38 | 721.925 | 689.100 | 714.72 | |
GOOG | Mean | 262 | <dist> | 601.5505 | 2016-01-15 | 692.29 | 706.740 | 685.370 | 694.45 |
Plot the forecasts
|>
google_fc autoplot(google_2015, level = NULL) +
autolayer(google_jan_2016, Close, color = "black") +
labs(x = "Day", y = "Closing Price (US$)",
title = "Google stock prices (Jan 2015 - Jan 2016)") +
guides(colour = guide_legend(title = "Forecast"))
augment(google_fit)
Symbol <chr> | .model <chr> | day <int> | Close <dbl> | .fitted <dbl> | .resid <dbl> | .innov <dbl> |
---|---|---|---|---|---|---|
GOOG | Mean | 1 | 521.9377 | 601.5505 | -7.961280e+01 | -7.961280e+01 |
GOOG | Mean | 2 | 511.0576 | 601.5505 | -9.049293e+01 | -9.049293e+01 |
GOOG | Mean | 3 | 499.2128 | 601.5505 | -1.023377e+02 | -1.023377e+02 |
GOOG | Mean | 4 | 498.3575 | 601.5505 | -1.031930e+02 | -1.031930e+02 |
GOOG | Mean | 5 | 499.9289 | 601.5505 | -1.016217e+02 | -1.016217e+02 |
GOOG | Mean | 6 | 493.4545 | 601.5505 | -1.080960e+02 | -1.080960e+02 |
GOOG | Mean | 7 | 489.8543 | 601.5505 | -1.116962e+02 | -1.116962e+02 |
GOOG | Mean | 8 | 493.4644 | 601.5505 | -1.080861e+02 | -1.080861e+02 |
GOOG | Mean | 9 | 498.1288 | 601.5505 | -1.034218e+02 | -1.034218e+02 |
GOOG | Mean | 10 | 499.0437 | 601.5505 | -1.025068e+02 | -1.025068e+02 |
Residual diagnostics
A good forecasting method will yield innovation residuals with the following properties:
- The innovation residuals are uncorrelated. If there are correlations between innovation residuals, then there is information left in the residuals which should be used in computing forecasts.
- The innovation residuals have zero mean. If they have a mean other than zero, then the forecasts are biased.
- The innovation residuals have constant variance.
- The innovation residuals are normally distributed.
autoplot(google_2015, Close) +
labs(x = "Day", y = "Closing Price (US$)",
title = "Google Stock in 2015")
Fit the Naive model (jus t the mean) and augment the dataset with the residuals and innovations.
<- google_2015 |>
aug model(NAIVE(Close)) |>
augment()
autoplot(aug, .innov) +
labs(x = "Day", y = "Residual",
title = "Residuals from naïve method")
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).
Normal?
|>
aug ggplot(aes(x = .innov)) +
geom_histogram() +
labs(title = "Histogram of residuals")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_bin()`).
|>
aug ACF(.innov) |>
autoplot() +
labs(title = "ACF of residuals")
|>
google_2015 model(NAIVE(Close)) |>
gg_tsresiduals()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_bin()`).
Portmanteau test
From a French word describing a suitcase or coat rack carrying several items of clothing.
Test if there is at least one lagged autocorrelation is different from zero.
Box-Pierce
|> features(.innov, box_pierce, lag = 10) aug
Symbol <chr> | .model <chr> | bp_stat <dbl> | bp_pvalue <dbl> | |
---|---|---|---|---|
GOOG | NAIVE(Close) | 7.744517 | 0.6537761 |
Ljung-Box
|> features(.innov, ljung_box, lag = 10) aug
Symbol <chr> | .model <chr> | lb_stat <dbl> | lb_pvalue <dbl> | |
---|---|---|---|---|
GOOG | NAIVE(Close) | 7.914143 | 0.6372231 |