### Statistics 6871 - Time Series

### Lab 4

### Introduction to writing script files in splus and fitting ARIMA models.

### Note that Splus is case sensitive!

###########################################################################################

# Lake Huron: Recall the Lake Huron data which relates to the level of the lake 1875-1972.

# Load the data into a rts (regular time series) object in splus called Lake. This

# is done using the rts and scan commands. The tspar command gives some details about the

# rts object Lake.

Lake <- rts(scan("I:\\Courswrk\\Stat\\esuess\\Stat6871\\Data_bd\\Lake.txt"),start=1975,frequency=12,units="months")

tspar(Lake)

# Plot the time series Lake using the ts.plot command. The par command is first given

# so we can have more than one plot on a page.

par(mfcol=c(3,2))

ts.plot(Lake, main="Level of Lake Huron")

# Next reload the data without the months attached, this way the time series can be plotted

# against the index in time.

Lake <- rts(scan("I:\\Courswrk\\Stat\\esuess\\Stat6871\\Data_bd\\Lake.txt"))

tspar(Lake)

ts.plot(Lake, main="Level of Lake Huron")

# The Lake Huron data has a negative trend. Earlier we detrended the data byusing a linear

# fit and plotting the residuals. To fit a simple linear regression model to the data we

# use the lm (linear model) command. Note that in the parenthesis the model is given with

# the ~ inducating an equal sign.

y <- Lake

x <- 1:length(Lake)

fit <- lm(y ~ x)

fit # This will show the output of the fit.

# Plot the line on the same picture as the data.

ts.plot(Lake, main="Level of Lake Huron", ylim=c(6,12)) # ylim specifies the limits on the y-axis.

par(new=T) # This renews the plot so another plot can be plotted on top of the previous plot.

y.hat <- rts(fit$fitted)

ts.plot(y.hat, ylim=c(6,12))

# Define the new object Lake.detrend. This new object gets the residuals of the lm fit.

Lake.detrend <- rts(fit$resid)

# Plot the residuals. Note that the residuals appear to be stationary.

ts.plot(Lake.detrend, main="Detrended Level of Lake Huron")

# Now plot the ACF and PACF to get an idea of the second-order correlation in the data.

acf(Lake.detrend)

acf(Lake.detrend, type="partial")

# What does the one negative spike in the PACF imply about a possible ARIMA model?

# We will fit some AR models to the data using the ar (autoregressive model) command.

# We will give the option order.max = 5 which will tell splus to fit the first five

# AR models to the data.

Lake.detrend.ar <- ar(Lake.detrend, order.max=5)

Lake.detrend.ar # This will give all of the output produced by the ar command.

# Some of the sub-objects in the output are:

Lake.detrend.ar$order # Order of the best fitting model according to the AIC.

# The Akaike Information Criteria AIC is a goodness of fit criteria that rewards reducing

# the squared error and penalizes for additional parameters. The formula is:

# AIC(K) = log( hat{sigma}^2 ) + 2*K/n where K is the order of the model fit.

# The best fitting model has the minimum AIC.

Lake.detrend.ar$ar # The coefficients.

Lake.detrend.ar$aic # The AIC values for the models fit.

Lake.detrend.ar$resid # The residuals.

# What is the best fitting model? What is its order? What are the coefficients?

# What is the AIC of the model?

# Next we examine the residuals. Plot the residuals. First we give the par command again.

Lake.detrend.ar.resid <- Lake.detrend.ar$resid

par(mfcol=c(3,2))

ts.plot(Lake.detrend.ar.resid, main="Residuals of the ar fit")

# Plot the acf of the residuals. Do they seem to be white noise?

acf(Lake.detrend.ar.resid)

# Plot a histogram of the residuals. Do they seem normal?

hist(Lake.detrend.ar.resid)

###########################################################################################

###########################################################################################

# Some Simulated data examples.

# Simulate simple first order moving average models of differing lengths n = 10, 100, and 1000.

# What do you notice about the confidence intervals on the acf's?

# Note: Recall that in splus the MA coefficients are negative of the coefficients from the book.

x.1 <- rts(arima.sim(list(order=c(0,0,1), ma = -0.5), n=10))

x.2 <- rts(arima.sim(list(order=c(0,0,1), ma = -0.5), n=100))

x.3 <- rts(arima.sim(list(order=c(0,0,1), ma = -0.5), n=1000))

par(mfrow=c(2,3))

ts.plot(x.1,main="x.1")

ts.plot(x.2,main="x.2")

ts.plot(x.3,main="x.3")

acf(x.1)

acf(x.2)

acf(x.3)

# What is noticeable about the ACF of x.3?

# Next we will fit a MA 1 model the simulated data x.3 using the arima.mle command.

x.3.ma1 <- arima.mle(x.3, model=list(order=c(0,0,1)), n.cond = 2)

x.3.ma1$model

x.3.ma1$aic

# What happens if we fit a MA 2 model to the series? What happens to the AIC?

x.3.ma2 <- arima.mle(x.3, model=list(order=c(0,0,2)), n.cond = 2)

x.3.ma2$model

x.3.ma2$aic

# Diagnostics for the arima fit can be calculated using the arima.diag command.

par(mfrow=c(1,1))

x.3.ma1.diag <- arima.diag(x.3.ma1)

x.3.ma1.diag$gof # This give the values of the portmantau test for the values of the lags.

###########################################################################################

###########################################################################################

# Sunspots data. In the file Wsunsposts.txt there is the date and sun spot measurements.

DateSunSpots <- matrix(scan("I:\\Courswrk\\Stat\\esuess\\Stat6871\\Data_e\\Wsunspots.txt"), ncol=2, byrow=T)

SunSpots <- rts(DateSunSpots[,2], start=1749, frequency=12, units="years")

ts.plot(SunSpots, main="Wolfer's Sunspot Data")

# Create the mean corrected data. X_t = D_t - 46.93

SunSpots <- SunSpots - 46.93

# Plot the ACF and PACF of the Sunspot data. What do you see in the PACF?

par(mfrow=c(2,2))

acf(SunSpots)

acf(SunSpots, type="partial")

SunSpots.ar1 <- ar(SunSpots, order=2)

SunSpots.ar1$ar

 

 

 

 

 

webmaster: Eric A. Suess    Last Edited: Tuesday, April 25, 2000