Today we are going to take a look at a number of the time series datasets, tibbles, that are used in the fpp3 book that show seasonal patterns.

A seasonal pattern is one that is exhibited over and over again at regular intervals.

library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.3 ──
## ✓ tibble      3.0.5       ✓ tsibble     0.9.3  
## ✓ dplyr       1.0.3       ✓ tsibbledata 0.2.0  
## ✓ tidyr       1.1.2       ✓ feasts      0.1.6  
## ✓ lubridate   1.7.9.2     ✓ fable       0.2.1  
## ✓ ggplot2     3.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## x lubridate::date()   masks base::date()
## x dplyr::filter()     masks stats::filter()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag()        masks stats::lag()

The book the author used the following code to create the a10 tsibble.

Most poeple consider the code here to be “sort of bad” code. Why?

Answer: The assignment to a new object in R is usually done at the start of a pipeline of code not at the end. It is easy to missing the assignment at the end.

PBS %>%
  filter(ATC2 == "A10") %>%
  select(Month, Concession, Type, Cost) %>%
  summarise(TotalC = sum(Cost)) %>%
  mutate(Cost = TotalC / 1e6) -> a10

Beware of the is in the book. It may be hard to find code your are looking for because of this. Better practice.

a10 <- PBS %>%
  filter(ATC2 == "A10") %>%
  select(Month, Concession, Type, Cost) %>%
  summarise(TotalC = sum(Cost)) %>%
  mutate(Cost = TotalC / 1e6)    # 1e6 = 1000000
a10
## # A tsibble: 204 x 3 [1M]
##       Month  TotalC  Cost
##       <mth>   <dbl> <dbl>
##  1 1991 Jul 3526591  3.53
##  2 1991 Aug 3180891  3.18
##  3 1991 Sep 3252221  3.25
##  4 1991 Oct 3611003  3.61
##  5 1991 Nov 3565869  3.57
##  6 1991 Dec 4306371  4.31
##  7 1992 Jan 5088335  5.09
##  8 1992 Feb 2814520  2.81
##  9 1992 Mar 2985811  2.99
## 10 1992 Apr 3204780  3.20
## # … with 194 more rows
a10 %>% autoplot()
## Plot variable not specified, automatically selected `.vars = TotalC`

a10 %>% gg_season()  # plots first column
## Plot variable not specified, automatically selected `y = TotalC`
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="

a10 %>%
  gg_season(Cost, labels = "both") +
  labs(y = "$ million",
       title = "Seasonal plot: antidiabetic drug sales")
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="

a10 %>% gg_subseries()  # plots first column
## Plot variable not specified, automatically selected `y = TotalC`

a10 %>%
  gg_subseries(Cost) +
  labs(y = "$ million",
       title = "Seasonal subseries plot: antidiabetic drug sales")

Notice that there is an upward trend in the data. So the year is an important predictor of the TotalC.

Also notice the effect of the tend on the ACF.

a10 %>% gg_tsdisplay(TotalC)
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="

Tourism

Filter the data to only look at trips where the purpose was a Holiday.

holidays <- tourism %>%
  filter(Purpose == "Holiday") %>%
  group_by(State) %>%
  summarise(Trips = sum(Trips))

holidays
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter Trips
##    <chr>   <qtr> <dbl>
##  1 ACT   1998 Q1  196.
##  2 ACT   1998 Q2  127.
##  3 ACT   1998 Q3  111.
##  4 ACT   1998 Q4  170.
##  5 ACT   1999 Q1  108.
##  6 ACT   1999 Q2  125.
##  7 ACT   1999 Q3  178.
##  8 ACT   1999 Q4  218.
##  9 ACT   2000 Q1  158.
## 10 ACT   2000 Q2  155.
## # … with 630 more rows
holidays %>% autoplot(Trips) +
  labs(y = "thousands of trips",
       title = "Australian domestic holiday nights")

Compare two of the time series.

Victory is very well behaved Quarterly data.

holidays %>% filter(State == "Victoria") %>% 
  gg_tsdisplay()
## Plot variable not specified, automatically selected `y = Trips`
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="

ACT is not so well behaved.

holidays %>% filter(State == "ACT") %>% 
  gg_tsdisplay()
## Plot variable not specified, automatically selected `y = Trips`
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="

Scatterplots

Now, summing over all of the different Purposes within the States.

visitors <- tourism %>%
  group_by(State) %>%
  summarise(Trips = sum(Trips))

visitors 
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter Trips
##    <chr>   <qtr> <dbl>
##  1 ACT   1998 Q1  551.
##  2 ACT   1998 Q2  416.
##  3 ACT   1998 Q3  436.
##  4 ACT   1998 Q4  450.
##  5 ACT   1999 Q1  379.
##  6 ACT   1999 Q2  558.
##  7 ACT   1999 Q3  449.
##  8 ACT   1999 Q4  595.
##  9 ACT   2000 Q1  600.
## 10 ACT   2000 Q2  557.
## # … with 630 more rows

Consider the cross-correlation between the different time series from different States.

visitors %>%
  ggplot(aes(x = Quarter, y = Trips)) +
  geom_line() +
  facet_grid(vars(State), scales = "free_y") +
  labs(y = "Number of visitor nights each quarter (millions)")

The ggpairs() function assumes the different time series are down separate columns. Note that is changes a tidy tsibble into a non-tidy tsibble.

visitors %>%
  pivot_wider(values_from=Trips, names_from=State) %>%
  GGally::ggpairs(columns = 2:9)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

Now consider the autocorrelation with since time series.

Can you see the autocorrelation every 4 Quarters in the Victoria time series?

visitors %>%
  pivot_wider(values_from=Trips, names_from=State) %>% 
  select("Victoria") %>% 
  gg_lag(geom = "point")
## Plot variable not specified, automatically selected `y = Victoria`

visitors %>%
  pivot_wider(values_from=Trips, names_from=State) %>% 
  select("Victoria") %>% 
  ACF() %>% 
  autoplot()
## Response variable not specified, automatically selected `var = Victoria`

Can you see that there is not so clear of a seasonal pattern in the ACT time series.

visitors %>%
  pivot_wider(values_from=Trips, names_from=State) %>% 
  select("ACT") %>% 
  gg_lag(geom = "point")
## Plot variable not specified, automatically selected `y = ACT`

visitors %>%
  pivot_wider(values_from=Trips, names_from=State) %>% 
  select("ACT") %>% 
  ACF() %>% 
  autoplot()
## Response variable not specified, automatically selected `var = ACT`