###
Statistics 6871 - Time Series
### Lab 4
### Introduction to writing script files in splus and fitting ARIMA
models.
### Note that Splus is case sensitive!
###########################################################################################
# Lake Huron: Recall the Lake Huron data which relates to the level
of the lake 1875-1972.
# Load the data into a rts (regular time series) object in splus
called Lake. This
# is done using the rts and scan commands. The tspar command gives
some details about the
# rts object Lake.
Lake <-
rts(scan("I:\\Courswrk\\Stat\\esuess\\Stat6871\\Data_bd\\Lake.txt"),start=1975,frequency=12,units="months")
tspar(Lake)
# Plot the time series Lake using the ts.plot command. The par
command is first given
# so we can have more than one plot on a page.
par(mfcol=c(3,2))
ts.plot(Lake, main="Level of Lake Huron")
# Next reload the data without the months attached, this way the
time series can be plotted
# against the index in time.
Lake <-
rts(scan("I:\\Courswrk\\Stat\\esuess\\Stat6871\\Data_bd\\Lake.txt"))
tspar(Lake)
ts.plot(Lake, main="Level of Lake Huron")
# The Lake Huron data has a negative trend. Earlier we detrended
the data byusing a linear
# fit and plotting the residuals. To fit a simple linear regression
model to the data we
# use the lm (linear model) command. Note that in the parenthesis
the model is given with
# the ~ inducating an equal sign.
y <- Lake
x <- 1:length(Lake)
fit <- lm(y ~ x)
fit # This will show the output of the fit.
# Plot the line on the same picture as the data.
ts.plot(Lake, main="Level of Lake Huron", ylim=c(6,12)) #
ylim specifies the limits on the y-axis.
par(new=T) # This renews the plot so another plot can be plotted on
top of the previous plot.
y.hat <- rts(fit$fitted)
ts.plot(y.hat, ylim=c(6,12))
# Define the new object Lake.detrend. This new object gets the
residuals of the lm fit.
Lake.detrend <- rts(fit$resid)
# Plot the residuals. Note that the residuals appear to be
stationary.
ts.plot(Lake.detrend, main="Detrended Level of Lake
Huron")
# Now plot the ACF and PACF to get an idea of the second-order
correlation in the data.
acf(Lake.detrend)
acf(Lake.detrend, type="partial")
# What does the one negative spike in the PACF imply about a
possible ARIMA model?
# We will fit some AR models to the data using the ar
(autoregressive model) command.
# We will give the option order.max = 5 which will tell splus to
fit the first five
# AR models to the data.
Lake.detrend.ar <- ar(Lake.detrend, order.max=5)
Lake.detrend.ar # This will give all of the output produced by the
ar command.
# Some of the sub-objects in the output are:
Lake.detrend.ar$order # Order of the best fitting model according
to the AIC.
# The Akaike Information Criteria AIC is a goodness of fit criteria
that rewards reducing
# the squared error and penalizes for additional parameters. The
formula is:
# AIC(K) = log( hat{sigma}^2 ) + 2*K/n where K is the order of the
model fit.
# The best fitting model has the minimum AIC.
Lake.detrend.ar$ar # The coefficients.
Lake.detrend.ar$aic # The AIC values for the models fit.
Lake.detrend.ar$resid # The residuals.
# What is the best fitting model? What is its order? What are the
coefficients?
# What is the AIC of the model?
# Next we examine the residuals. Plot the residuals. First we give
the par command again.
Lake.detrend.ar.resid <- Lake.detrend.ar$resid
par(mfcol=c(3,2))
ts.plot(Lake.detrend.ar.resid, main="Residuals of the ar
fit")
# Plot the acf of the residuals. Do they seem to be white noise?
acf(Lake.detrend.ar.resid)
# Plot a histogram of the residuals. Do they seem normal?
hist(Lake.detrend.ar.resid)
###########################################################################################
###########################################################################################
# Some Simulated data examples.
# Simulate simple first order moving average models of differing
lengths n = 10, 100, and 1000.
# What do you notice about the confidence intervals on the acf's?
# Note: Recall that in splus the MA coefficients are negative of
the coefficients from the book.
x.1 <- rts(arima.sim(list(order=c(0,0,1), ma = -0.5), n=10))
x.2 <- rts(arima.sim(list(order=c(0,0,1), ma = -0.5), n=100))
x.3 <- rts(arima.sim(list(order=c(0,0,1), ma = -0.5), n=1000))
par(mfrow=c(2,3))
ts.plot(x.1,main="x.1")
ts.plot(x.2,main="x.2")
ts.plot(x.3,main="x.3")
acf(x.1)
acf(x.2)
acf(x.3)
# What is noticeable about the ACF of x.3?
# Next we will fit a MA 1 model the simulated data x.3 using the
arima.mle command.
x.3.ma1 <- arima.mle(x.3, model=list(order=c(0,0,1)), n.cond =
2)
x.3.ma1$model
x.3.ma1$aic
# What happens if we fit a MA 2 model to the series? What happens
to the AIC?
x.3.ma2 <- arima.mle(x.3, model=list(order=c(0,0,2)), n.cond =
2)
x.3.ma2$model
x.3.ma2$aic
# Diagnostics for the arima fit can be calculated using the
arima.diag command.
par(mfrow=c(1,1))
x.3.ma1.diag <- arima.diag(x.3.ma1)
x.3.ma1.diag$gof # This give the values of the portmantau test for
the values of the lags.
###########################################################################################
###########################################################################################
# Sunspots data. In the file Wsunsposts.txt there is the date and
sun spot measurements.
DateSunSpots <-
matrix(scan("I:\\Courswrk\\Stat\\esuess\\Stat6871\\Data_e\\Wsunspots.txt"),
ncol=2, byrow=T)
SunSpots <- rts(DateSunSpots[,2], start=1749, frequency=12,
units="years")
ts.plot(SunSpots, main="Wolfer's Sunspot Data")
# Create the mean corrected data. X_t = D_t - 46.93
SunSpots <- SunSpots - 46.93
# Plot the ACF and PACF of the Sunspot data. What do you see in the
PACF?
par(mfrow=c(2,2))
acf(SunSpots)
acf(SunSpots, type="partial")
SunSpots.ar1 <- ar(SunSpots, order=2)
SunSpots.ar1$ar