library(pacman)
p_load(tidyverse, fpp3)
Models2
Model Google’s stock price.
Re-index based on trading days
<- gafa_stock %>%
google_stock filter(Symbol == "GOOG", year(Date) >= 2015) %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
google_stock
# A tsibble: 1,006 x 9 [1]
# Key: Symbol [1]
Symbol Date Open High Low Close Adj_Close Volume day
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 GOOG 2015-01-02 526. 528. 521. 522. 522. 1447600 1
2 GOOG 2015-01-05 520. 521. 510. 511. 511. 2059800 2
3 GOOG 2015-01-06 512. 513. 498. 499. 499. 2899900 3
4 GOOG 2015-01-07 504. 504. 497. 498. 498. 2065100 4
5 GOOG 2015-01-08 495. 501. 488. 500. 500. 3353600 5
6 GOOG 2015-01-09 502. 502. 492. 493. 493. 2069400 6
7 GOOG 2015-01-12 492. 493. 485. 490. 490. 2322400 7
8 GOOG 2015-01-13 496. 500. 490. 493. 493. 2370500 8
9 GOOG 2015-01-14 492. 500. 490. 498. 498. 2235700 9
10 GOOG 2015-01-15 503. 503. 495. 499. 499. 2715800 10
# … with 996 more rows
Filter the year of interest
<- google_stock %>% filter(year(Date) == 2015)
google_2015 google_2015
# A tsibble: 252 x 9 [1]
# Key: Symbol [1]
Symbol Date Open High Low Close Adj_Close Volume day
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 GOOG 2015-01-02 526. 528. 521. 522. 522. 1447600 1
2 GOOG 2015-01-05 520. 521. 510. 511. 511. 2059800 2
3 GOOG 2015-01-06 512. 513. 498. 499. 499. 2899900 3
4 GOOG 2015-01-07 504. 504. 497. 498. 498. 2065100 4
5 GOOG 2015-01-08 495. 501. 488. 500. 500. 3353600 5
6 GOOG 2015-01-09 502. 502. 492. 493. 493. 2069400 6
7 GOOG 2015-01-12 492. 493. 485. 490. 490. 2322400 7
8 GOOG 2015-01-13 496. 500. 490. 493. 493. 2370500 8
9 GOOG 2015-01-14 492. 500. 490. 498. 498. 2235700 9
10 GOOG 2015-01-15 503. 503. 495. 499. 499. 2715800 10
# … with 242 more rows
Fit the models
<- google_2015 %>%
google_fit model(
Mean = MEAN(Close),
`Naïve` = NAIVE(Close),
Drift = NAIVE(Close ~ drift())
) google_fit
# A mable: 1 x 4
# Key: Symbol [1]
Symbol Mean Naïve Drift
<chr> <model> <model> <model>
1 GOOG <MEAN> <NAIVE> <RW w/ drift>
Produce forecasts for the trading days in January 2016
<- google_stock %>%
google_jan_2016 filter(yearmonth(Date) == yearmonth("2016 Jan"))
<- google_fit %>%
google_fc forecast(new_data = google_jan_2016)
google_fc
# A fable: 57 x 11 [1]
# Key: Symbol, .model [3]
Symbol .model day Close .mean Date Open High Low Adj_Close
<chr> <chr> <int> <dist> <dbl> <date> <dbl> <dbl> <dbl> <dbl>
1 GOOG Mean 253 N(602, 6766) 602. 2016-01-04 743 744. 731. 742.
2 GOOG Mean 254 N(602, 6766) 602. 2016-01-05 746. 752 739. 743.
3 GOOG Mean 255 N(602, 6766) 602. 2016-01-06 730 747. 729. 744.
4 GOOG Mean 256 N(602, 6766) 602. 2016-01-07 730. 738. 719. 726.
5 GOOG Mean 257 N(602, 6766) 602. 2016-01-08 731. 733. 713 714.
6 GOOG Mean 258 N(602, 6766) 602. 2016-01-11 717. 719. 704. 716.
7 GOOG Mean 259 N(602, 6766) 602. 2016-01-12 722. 729. 717. 726.
8 GOOG Mean 260 N(602, 6766) 602. 2016-01-13 731. 735. 699. 701.
9 GOOG Mean 261 N(602, 6766) 602. 2016-01-14 705. 722. 689. 715.
10 GOOG Mean 262 N(602, 6766) 602. 2016-01-15 692. 707. 685. 694.
# … with 47 more rows, and 1 more variable: Volume <dbl>
Plot the forecasts
%>%
google_fc autoplot(google_2015, level = NULL) +
autolayer(google_jan_2016, Close, color = "black") +
labs(x = "Day", y = "Closing Price (US$)",
title = "Google stock prices (Jan 2015 - Jan 2016)") +
guides(colour = guide_legend(title = "Forecast"))
augment(google_fit)
# A tsibble: 756 x 7 [1]
# Key: Symbol, .model [3]
Symbol .model day Close .fitted .resid .innov
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
1 GOOG Mean 1 522. 602. -79.6 -79.6
2 GOOG Mean 2 511. 602. -90.5 -90.5
3 GOOG Mean 3 499. 602. -102. -102.
4 GOOG Mean 4 498. 602. -103. -103.
5 GOOG Mean 5 500. 602. -102. -102.
6 GOOG Mean 6 493. 602. -108. -108.
7 GOOG Mean 7 490. 602. -112. -112.
8 GOOG Mean 8 493. 602. -108. -108.
9 GOOG Mean 9 498. 602. -103. -103.
10 GOOG Mean 10 499. 602. -103. -103.
# … with 746 more rows
Residual diagnostics
A good forecasting method will yield innovation residuals with the following properties:
- The innovation residuals are uncorrelated. If there are correlations between innovation residuals, then there is information left in the residuals which should be used in computing forecasts.
- The innovation residuals have zero mean. If they have a mean other than zero, then the forecasts are biased.
- The innovation residuals have constant variance.
- The innovation residuals are normally distributed.
autoplot(google_2015, Close) +
labs(x = "Day", y = "Closing Price (US$)",
title = "Google Stock in 2015")
Fit the Naive model (jus t the mean) and augment the dataset with the residuals and innovations.
<- google_2015 %>%
aug model(NAIVE(Close)) %>%
augment()
autoplot(aug, .innov) +
labs(x = "Day", y = "Residual",
title = "Residuals from naïve method")
Warning: Removed 1 row(s) containing missing values (geom_path).
Normal?
%>%
aug ggplot(aes(x = .innov)) +
geom_histogram() +
labs(title = "Histogram of residuals")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 1 rows containing non-finite values (stat_bin).
%>%
aug ACF(.innov) %>%
autoplot() +
labs(title = "ACF of residuals")
%>%
google_2015 model(NAIVE(Close)) %>%
gg_tsresiduals()
Warning: Removed 1 row(s) containing missing values (geom_path).
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 1 rows containing non-finite values (stat_bin).
Portmanteau test
From a French word describing a suitcase or coat rack carrying several items of clothing.
Test if there is at least one lagged autocorrelation is different from zero.
Box-Pierce
%>% features(.innov, box_pierce, lag = 10) aug
# A tibble: 1 × 4
Symbol .model bp_stat bp_pvalue
<chr> <chr> <dbl> <dbl>
1 GOOG NAIVE(Close) 7.74 0.654
Ljung-Box
%>% features(.innov, ljung_box, lag = 10) aug
# A tibble: 1 × 4
Symbol .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 GOOG NAIVE(Close) 7.91 0.637