#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

save_plots = FALSE

# PCA
#
yieldDat = read.table( '../../BookCode/Data/yields.txt', header=T )
maturity = c( (0:5), 5.5, 6.5, 7.5, 8.5, 9.5 )
pairs(yieldDat)
par(mfrow=c(4,3))
for( i in 0:11 ){
  plot( maturity, yieldDat[100*i+1,], type="b", main=sprintf("i=%d",i) )
}

eig = eigen( cov( yieldDat ) )
par(mfrow=c(1,1))
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter18/eig_value_barplot.eps", onefile=FALSE, horizontal=FALSE) }
barplot( eig$values )
if( save_plots ){ dev.off() }

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter18/eig_vector_plots.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(2,2))
plot( eig$vector[,1], ylim=c(-0.7,+0.7), type="b" )
abline(h=0)
plot( eig$vector[,2], ylim=c(-0.7,+0.7), type="b" )
abline(h=0)
plot( eig$vector[,3], ylim=c(-0.7,+0.7), type="b" )
abline(h=0)
plot( eig$vector[,4], ylim=c(-0.7,+0.7), type="b" )
abline(h=0)
if( save_plots ){ dev.off() }


# Problem 1-2:
#
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter18/yield_timeseries_fixed_maturity.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(2,2))
plot( yieldDat[,1], ylim=c(4.0,8.0), type="l" )
plot( yieldDat[,2], ylim=c(4.0,8.0), type="l" )
plot( yieldDat[,3], ylim=c(4.0,8.0), type="l" )
plot( yieldDat[,4], ylim=c(4.0,8.0), type="l" )
if( save_plots ){ dev.off() }

# Lets look at the Dickey-Fuller test:
#
library("tseries")
for( ii in 1:11 ){
  adf_test = adf.test( yieldDat[,ii] )
  print( sprintf( "column index= %2d; p_value= %10.6f", ii, adf_test$p.value ) )
}

# Problem 3:
#
n = dim(yieldDat)[1]
delta_yield = yieldDat[-1,] - yieldDat[-n,] 

# Plot yield differences: 
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter18/delta_yield_timeseries_fixed_maturity.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(2,2))
plot( delta_yield[,1], type="l" )
plot( delta_yield[,2], type="l" )
plot( delta_yield[,3], type="l" )
plot( delta_yield[,4], type="l" )
if( save_plots ){ dev.off() }

# Lets look at the Dickey-Fuller test:
#
for( ii in 1:11 ){
  adf_test = adf.test( delta_yield[,ii] )
  print( sprintf( "column index= %2d; p_value= %10.6f", ii, adf_test$p.value ) )
}

# Problem 4:
#
pca_del = princomp(delta_yield)
names(pca_del)
summary(pca_del)
plot(pca_del)

# Factor Models on Time Series (extracting the data range we need): 
#
FF_data = read.table("../../BookCode/Data/FamaFrenchDaily.txt",header=T)
start_index = which( FF_data$date == "20040102" )
end_index   = which( FF_data$date == "20051230" )
FF_data = FF_data[ start_index:end_index, ]
FF_data = FF_data[-1,] # delete first row since stocks_diff looses a row due to differencing 

stocks = read.csv("../../BookCode/Data/Stock_Bond_2004_to_2006.csv",header=T)
start_index = which( stocks$DATE ==  "1/2/2004" )
end_index   = which( stocks$DATE ==  "12/30/2005" )
stocks = stocks[ start_index:end_index, ] 
stocks_subset = stocks[, c('GM_AC','F_AC','UTX_AC','MRK_AC') ] # select a subset of each stocks adjusted closes
stocks_diff = as.data.frame( 100 * apply( log( stocks_subset ), 2, diff ) - FF_data$RF ) # compute each stocks excess return
names(stocks_diff) = c("GM","Ford","UTX","Merck")

# A fit based only on the market return: 
fit1 = lm( as.matrix(stocks_diff) ~ FF_data$Mkt.RF )
summary(fit1)

# Print the correlation between the residuals (observe how well the model assumptions match reality):
# 
cor( fit1$residuals ) 

for( i in 1:4 ){
  for( j in 1:4 ){
    if( j >= i ){ next  }
    tmp = cor.test( fit1$residuals[,i], fit1$residuals[,j] )
    print( sprintf("Correlation between %5s and %5s; (%10.6f, %10.6f, %10.6f)",
                   colnames(fit1$residuals)[i], colnames(fit1$residuals)[j],
                   tmp$conf.int[1], tmp$estimate, tmp$conf.int[2] ) )
  }
}

# The sample covariance is:
#
Sigma_R = cov( stocks_diff )

# Factor based covariance is:
#
# beta^T Sigma_F beta + Sigma_e
#
# thus to compute it we need to compute each term in the above decomposition
#
Sigma_F = var( FF_data$Mkt.RF ) # The only factor return is the market return r_{Market,t} (and is a scalar)

# The beta vector are the CAPM beta coefficients (ignore the constant term) as a ROW vector:
# transpose needed due to how as.matrix formats result
# 
beta = t( as.matrix( fit1$coefficients[2,] ) )

betaT_Times_Sigma_F_Times_beta = t(beta) %*% ( Sigma_F * beta ) 

Sigma_e = diag( diag( cov( fit1$residuals ) ), 4, 4 )

Sigma_R_hat = betaT_Times_Sigma_F_Times_beta + Sigma_e 

abs( Sigma_R - Sigma_R_hat )


# Using the Fama-French model:
#
fit2 = lm( as.matrix(stocks_diff) ~ FF_data$Mkt.RF + FF_data$SMB + FF_data$HML )
sfit2 = summary(fit2)

# Lets look at the p-values for the beta estimates:
#
# Extract the data "by hand" ... not sure how else to do this ...
#
sfit2$'Response GM'$coefficients
sfit2$'Response Ford'$coefficients
sfit2$'Response UTX'$coefficients 
sfit2$'Response Merck'$coefficients


# Look at the correlation of the residuals in the Fama-French model:
#
cor( fit2$residuals ) 

for( i in 1:4 ){
  for( j in 1:4 ){
    if( j >= i ){ next  }
    tmp = cor.test( fit2$residuals[,i], fit2$residuals[,j] )
    print( sprintf("Correlation between %5s and %5s; (%10.6f, %10.6f, %10.6f)",
                   colnames(fit2$residuals)[i], colnames(fit2$residuals)[j],
                   tmp$conf.int[1], tmp$estimate, tmp$conf.int[2] ) )
  }
}

if( FALSE ){
    # Extract the AIC and the BIC for linear regression:
    # 
    print( sprintf("AIC(fit1)= %10.6f; AIC(fit2)= %10.6f", AIC(fit1), AIC(fit2)) )
    n = dim(stocks_diff)[1]
    p_fit_1 = 1
    p_fit_2 = 3 
    print( sprintf("BIC(fit1)= %10.6f; BIC(fit2)= %10.6f", AIC(fit1)-2*(1+p_fit_1)+log(n)*(1+p_fit_1), AIC(fit2)-2*(1+p_fit_2)+log(n)*(1+p_fit_2)) )
}

# Extract the factor estimated covariance:
Sigma_F = cov( FF_data[ , c( 'Mkt.RF', 'SMB', 'HML' ) ] )

beta = as.matrix( fit2$coefficients[2:4,] )

betaT_Times_Sigma_F_Times_beta = t( beta ) %*% Sigma_F %*% beta 

Sigma_e = diag( diag( cov( fit2$residuals ) ), 4, 4 )

Sigma_R_hat = betaT_Times_Sigma_F_Times_beta + Sigma_e 

abs( Sigma_R - Sigma_R_hat )


# 
betas_1 = c( 0.5, 0.4, -0.1 )
sigma_e_1 = 23.0

betas_2 = c( 0.6, 0.15, 0.7 )
sigma_e_2 = 37.0

beta = matrix( c( betas_1, betas_2 ), nrow=3, ncol=2, byrow=F )

t(beta) %*% Sigma_F %*% beta + diag( c(sigma_e_1,sigma_e_2), nrow=2, ncol=2 )


# Problem 13
#
dat = read.csv("../../BookCode/Data/Stock_FX_Bond.csv")
stocks_ac = dat[,c(3,5,7,9,11,13,15,17)] 
n = length( stocks_ac[,1] )
stocks_returns = log( stocks_ac[-1,] / stocks_ac[-n,] )

fact = factanal( stocks_returns, factors=2, rotation='none' )
print(fact,cutoff=0.01)

if( FALSE ){
    # lets try more factors:
    #
    fact = factanal( stocks_returns, factors=4, rotation='none' )
    print(fact,cutoff=0.01)
}

loadings = matrix( as.numeric( loadings(fact) ), ncol=2 )
unique = as.numeric( fact$unique )
betas = t(loadings) # this is the format for the betas each stock is a column see page 460

Sigma_R_hat = t(betas) %*% betas + diag( unique, ncol=8 )
print(round(Sigma_R_hat, 4))