#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

if( !require('forecast') ){
   install.packages('forecast', dependencies=TRUE, repos='http://cran.rstudio.com/')
}
library(forecast)

if( !require('Ecdat') ){
   install.packages('Ecdat', dependencies=TRUE, repos='http://cran.rstudio.com/')
}
library(Ecdat)

if( !require('sandwich') ){
   install.packages('sandwich', dependencies=TRUE, repos='http://cran.rstudio.com/')
}
library(sandwich)


save_plots = FALSE

set.seed(0) 

#
# Seasonal ARIMA Models:
#

# P1 & 2: EPage 278
# 
data(IncomeUK)
consumption = IncomeUK[,2] 

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter13/consumption_ts_plot.eps", onefile=FALSE, horizontal=FALSE) }
plot(consumption)
grid()
if( save_plots ){ dev.off() }

# Consider consumption on its own:
# 
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter13/consumption_acfs.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(1,4))
acf( consumption ) 
acf( diff(consumption,1) )
acf( diff(consumption,4) ) 
acf( diff(diff(consumption,1),4) ) 
par(mfrow=c(1,1))
if( save_plots ){ dev.off() }

# Consider the logarithm of consumption:
#
lconsumption = log(consumption)
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter13/log_consumption_acfs.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(1,4))
acf( lconsumption ) 
acf( diff(lconsumption,1) )
acf( diff(lconsumption,4) ) 
acf( diff(diff(lconsumption,1),4) ) 
par(mfrow=c(1,1))
if( save_plots ){ dev.off() }

# Fit the model that looks most likely:
# 
consumption_ts_model = arima( consumption, order=c(0,1,0), seasonal=list(order=c(0,1,1), period=4) )
Box.test( residuals(consumption_ts_model), lag=10, type="Ljung" ) 

log_consumption_ts_model = arima( lconsumption, order=c(0,1,0), seasonal=list(order=c(0,1,1), period=4) )
Box.test( residuals(log_consumption_ts_model), lag=10, type="Ljung" ) 

# P3: Epage 278
# 
acf( residuals(log_consumption_ts_model) )

# P4: EPage 278
# 
auto.arima( lconsumption, ic="bic" )

# P5: EPage 278
# 
consumption_prediction_m_1 = predict( consumption_ts_model, n.ahead=8 ) 
consumption_prediction_m_2 = predict( log_consumption_ts_model, n.ahead=8 )

m_1_pred = consumption_prediction_m_1$pred
m_1_ll = m_1_pred - 2*consumption_prediction_m_1$se
m_1_ul = m_1_pred + 2*consumption_prediction_m_1$se

m_2_pred = exp( consumption_prediction_m_2$pred )
m_2_ll = exp( consumption_prediction_m_2$pred - 2*consumption_prediction_m_2$se )
m_2_ul = exp( consumption_prediction_m_2$pred + 2*consumption_prediction_m_2$se )

plot_y_max = max( c( m_1_ul, m_2_ul ) )
plot_y_min = min( c( m_1_ll, m_2_ll ) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter13/consumption_predictions.eps", onefile=FALSE, horizontal=FALSE) }
plot( m_1_pred, col='red', ylim=c(plot_y_min,plot_y_max) )
points( m_1_ll, type='l', lty=2, col='red' )
points( m_1_ul, type='l', lty=2, col='red' ) 

points( m_2_pred, type='l', col='green' )
points( m_2_ll, type='l', lty=2, col='green' )
points( m_2_ul, type='l', lty=2, col='green' ) 

grid()
if( save_plots ){ dev.off() }

#
# Regression with HAC Standard Errors:
# 
data(Mishkin, package='Ecdat')
tb1_dif = diff(as.vector(Mishkin[, 3]))
tb3_dif = diff(as.vector(Mishkin[, 4]))
fit = lm(tb1_dif ~ tb3_dif)
print(round(summary(fit)$coef, 4))
acf(fit$resid)

library(sandwich)
sqrt(diag(NeweyWest(fit, lag=0, prewhite=F)))
print(round(coef(fit)/sqrt(diag(NeweyWest(fit, lag=0, prewhite=F))), 4)) # the t-values
for( lg in 1:3 ){
    print(lg)
    print(round(coef(fit)/sqrt(diag(NeweyWest(fit, lag=lg, prewhite=F))), 4))# the t-values at diferent lags
}

#
# Regression with ARMA Noise:
# 
library(AER)
data("USMacroG")
MacroDiff = as.data.frame(apply(USMacroG,2,diff))
attach(MacroDiff)

fit1 = arima( unemp, order=c(1,0,0), xreg=cbind(invest,government) )
fit1_resids = residuals( fit1 ) 

fit2 = lm( unemp ~ invest+government )
fit2_resids = residuals( fit2 ) 

print( sprintf("AIC(arima fit)= %10.3f; AIC(lm fit)= %10.3f", AIC(fit1), AIC(fit2)) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter13/arima_vs_lm.eps", onefile=FALSE, horizontal=FALSE) }

par(mfrow=c(1,2))
acf( fit1_resids, main="arima fit" )
acf( fit2_resids, main="lm fit" )

if( save_plots ){ dev.off() }

# Compute the BIC for each model:
n = length(unemp) # same for both models 

p = length(fit1$coef) - 1 # subtract one for the intercept
fit1_bic = fit1$aic + (log(n) - 2)*p

p = length(fit2$coef) - 1 # subtract one for the intercept
fit2_bic = AIC(fit2) + (log(n) - 2)*p

print( sprintf("BIC(arima fit)= %10.3f; BIC(lm fit)= %10.3f", fit1_bic, fit2_bic) )

# Lets try an AR(2) model:
#
fit3 = arima( unemp, order=c(2,0,0), xreg=cbind(invest,government) )
fit3_bic = fit3$aic + (log(n) - 2)*(length(fit3$coef) - 1)

fit4 = arima( unemp, order=c(1,0,1), xreg=cbind(invest,government) )
fit4_bic = fit4$aic + (log(n) - 2)*(length(fit4$coef) - 1)

print( sprintf("BIC(arima(2,0,0))= %10.3f; BIC(arima(1,0,1))= %10.3f;", fit3_bic, fit4_bic) )


#
# VAR Models:
#
TbGdpPi = read.csv('../../BookCode/Data/TbGdpPi.csv', header=TRUE)
# r = the 91-day Treasury bill rate
# y = the log of real GDP
# pi = the inflation rate
#
TbGdpPi = ts(TbGdpPi, start=1955, freq=4)
Tbrate = TbGdpPi
del_dat = diff(Tbrate)
var1 = ar(del_dat,order.max=4,aic=T)
var1

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter13/var_residual_acf.eps", onefile=FALSE, horizontal=FALSE) }
mask = complete.cases(var1$resid)
acf(var1$resid[mask, ])
if( save_plots ){ dev.off() }

# Predict the next time points:
# 
df = diff(tail(TbGdpPi, n=4), 1)
newdata = ts(df, start=c(2013, 2), end=c(2013, 4), frequency=4)
print(predict(var1, newdata=newdata))


var1 = ar(del_dat, order.max=1)

yn = var1$x.mean * 1.1; yn
newdata[3, ] = yn # put this row of data into a time series object

# predict for various look aheads:
#
P = predict(var1, newdata=newdata, n.ahead=5)
print(P)

print('predictions over x.mean')
print(P$pred / rbind(var1$x.mean, var1$x.mean, var1$x.mean, var1$xmean, var1$x.mean, var1$x.mean))

Phi_hat = var1$ar[1,,]
print(Phi_hat)

eigen.values = eigen(Phi_hat)$values
print(abs(eigen.values))


MacroVars = read.csv('../../BookCode/Data/MacroVars.csv', head=TRUE)

# fit using AIC to select the lag order:
#
fit = ar(MacroVars, aic=TRUE)
print(fit)

aic = fit$aic

# use BIC to select the lag order:
#
bic = fit$aic + (log(dim(MacroVars)[1]) - 2)*( 0:(length(fit$aic)-1) )

# Plot the AIC and the BIC:
#
plot(0:(length(fit$aic)-1), aic, type='b', col='red', pch=17, cex=1.25, xlab='order(p)', main='AIC and BIC')
lines(0:(length(fit$aic)-1), bic, type='b', col='blue', pch=19, cex=1.25)
legend('top', c('AIC','BIC'), col=c('red','blue'), lty=c(1, 1), pch=c(17, 19), pt.cex=2)
grid()

#
# Long-Memory Processes
# P8: EPage 279
# 
data(Mishkin,package="Ecdat")
cpi = as.vector(Mishkin[,5])
DiffSqrtCpi = diff(sqrt(cpi))

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter13/diff_sqrt_cpi_ts_N_acf.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(1,2))
plot( DiffSqrtCpi, type='l' )
acf( DiffSqrtCpi )
par(mfrow=c(1,1))
if( save_plots ){ dev.off() }

library("fracdiff")
fit.frac = fracdiff(DiffSqrtCpi,nar=0,nma=0) # estimate the amount of fractional differencing to apply
fit.frac$d
fdiff = diffseries(DiffSqrtCpi,fit.frac$d)

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter13/frac_differenced_diff_sqrt_cpi_acf.eps", onefile=FALSE, horizontal=FALSE) }
acf(fdiff)
if( save_plots ){ dev.off() }

# This gives a ARIMA(0,1,1) model: 
auto.arima(fdiff,ic="aic")

# This gives a ARIMA(0,0,0) model: 
auto.arima(fdiff,d=0,ic="aic")


#
# Model-Based Bootstrapping of an ARIMA Process
#
library(AER)
data("FrozenJuice")
price = FrozenJuice[,1]
plot(price)
auto.arima(price,ic="bic")

# Generate bootstrap samples 
n = length(price)
sink("priceBootstrap.txt")
set.seed(1998852)
for( iter in 1:10 ){
  eps = rnorm(n+20)
  y = rep(0,n+20)
  for( t in 3:(n+20) ){
    y[t] = 0.2825 * y[t-1] + 0.057 * y[t-2] + eps[t]
  }
  y = y[101:n+20]
  y = cumsum(y)
  y = ts(y,frequency=12)
  fit = auto.arima(y,d=1,D=0,ic="bic")
  print(fit)
}
sink()


set.seed(1998852)
niter = 250
estimates = matrix(0,nrow=niter,ncol=2)
for( iter in 1:niter){
  eps = rnorm(n+20)
  y = rep(0,n+20)
  for( t in 3:(n+20) ){
    y[t] = 0.2825 * y[t-1] + 0.057 * y[t-2] + eps[t]
  }
  y = y[101:n+20]
  y = cumsum(y)
  t = ts(y,frequency=12)
  fit = arima(y,order=c(2,1,0))
  estimates[iter,] = fit$coef
}
# Compute the bias, standard deviations, and MSE  of the given estimate of the two AR(2) parameters: 
bias = c( 0.2825, 0.057 ) - apply( estimates, 2, mean )
std_devs = apply( estimates, 2, sd )
mses = std_devs / sqrt(dim(estimates)[1])
t(data.frame( bias, std_devs, mses ))