# # Computes the maximum likelihood ARIMA coefficients for the housing starts data set. # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # get the data into an R vector: Y <- c( 52.149, 47.205, 82.150, 100.931, 98.408, 97.351, 96.489, 88.830, 80.876, 85.750, 72.351, 61.198 , 46.561, 50.361, 83.236, 94.343, 84.748, 79.828, 69.068, 69.362, 59.404, 53.530, 50.212, 37.973 , 40.157, 40.274, 66.592, 79.839, 87.341, 87.594, 82.344, 83.712, 78.194, 81.704, 69.088, 47.026 , 45.234, 55.431, 79.325, 97.983, 86.806, 81.424, 86.398, 82.522, 80.078, 85.560, 64.819, 53.847 , 51.300, 47.909, 71.941, 84.982, 91.301, 82.741, 73.523, 69.465, 71.504, 68.039, 55.069, 42.827 , 33.363, 41.367, 61.879, 73.835, 74.848, 83.007, 75.461, 77.291, 75.961, 79.393, 67.443, 69.041 , 54.856, 58.287, 91.584, 116.013, 115.627, 116.946, 107.747, 111.663, 102.149, 102.882, 92.904, 80.362 , 76.185, 76.306, 111.358, 119.840, 135.167, 131.870, 119.078, 131.324, 120.491, 116.990, 97.428, 73.195 , 77.105, 73.560, 105.136, 120.453, 131.643, 114.822, 114.746, 106.806, 84.504, 86.004, 70.488, 46.767 , 43.292, 57.593, 76.946, 102.237, 96.340, 99.318, 90.715, 79.782, 73.443, 69.460, 57.898, 41.041 ) Y_hold_out <- c( 39.791, 39.959, 62.498, 77.777, 92.782, 90.284, 92.782, 90.655, 84.517, 93.826, 71.646, 55.650 ) n <- length(Y) # we plot the original data: postscript("../../WriteUp/Graphics/Chapter6/month_housing_data_plt.eps", onefile=FALSE, horizontal=FALSE) plot(1:n, Y, type="l", xlab="time", ylab="housing starts") dev.off() # consider the autocorrelation function directly postscript("../../WriteUp/Graphics/Chapter6/month_housing_data_direct_sacf.eps", onefile=FALSE, horizontal=FALSE) acf(Y) dev.off() # take the first seasonal difference to make the data stationary Yd <- diff(Y,lag=12,differences=1) nd <- length(Yd) plot(1:nd, Yd, type="l", xlab="time", ylab="first seasonal difference of housing starts") # plot the autocorrelation function of this first seasonal difference postscript("../../WriteUp/Graphics/Chapter6/month_housing_data_d12_sacf.eps", onefile=FALSE, horizontal=FALSE) acf(Yd) dev.off() # take a single difference of this series Yd2 <- diff(Yd,lag=1,differences=1) nd <- length(Yd2) plot(1:nd, Yd2, type="l", xlab="time", ylab="first seasonal difference of housing starts") # plot this series autcorrelation function postscript("../../WriteUp/Graphics/Chapter6/month_housing_data_dd12_sacf.eps", onefile=FALSE, horizontal=FALSE) acf(Yd2) dev.off() # based on the sample autocorrelation above lets assume a MA(1) and a SMA(1) model of Yd2 # this is a SARIMA (0,1,1)(0,1,1)_{12} model on Y # or a SARIMA (0,0,1)(0,0,1)_{12} model on Yd2 # fit <- arima( Yd2, order=c(0,0,1), seasonal=list(order=c(0,0,1),period=12), include.mean=TRUE, method="ML" ) print(fit) # drop the insignificant mean term # fit <- arima( Yd2, order=c(0,0,1), seasonal=list(order=c(0,0,1),period=12), include.mean=FALSE, method="ML" ) print(fit) # let arima do all of the work itself: # fit <- arima( Y, order=c(0,1,1), seasonal=list(order=c(0,1,1),period=12), include.mean=TRUE, method="ML" ) print(fit) # lets look at the residuals postscript("../../WriteUp/Graphics/Chapter6/month_housing_data_res_sacf.eps", onefile=FALSE, horizontal=FALSE) acf(residuals(fit)) dev.off() # to compute the forecasts we'll let R do the differences internally in the arima function: # fit <- arima( Y, order=c(0,1,1), seasonal=list(order=c(0,1,1),period=12), method="ML" ) print(fit) predict(fit, n.ahead=length(Y_hold_out) )