# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- save_plots = FALSE # PCA # yieldDat = read.table( '../../BookCode/Data/yields.txt', header=T ) maturity = c( (0:5), 5.5, 6.5, 7.5, 8.5, 9.5 ) pairs(yieldDat) par(mfrow=c(4,3)) for( i in 0:11 ){ plot( maturity, yieldDat[100*i+1,], type="b", main=sprintf("i=%d",i) ) } eig = eigen( cov( yieldDat ) ) par(mfrow=c(1,1)) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter18/eig_value_barplot.eps", onefile=FALSE, horizontal=FALSE) } barplot( eig$values ) if( save_plots ){ dev.off() } if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter18/eig_vector_plots.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(2,2)) plot( eig$vector[,1], ylim=c(-0.7,+0.7), type="b" ) abline(h=0) plot( eig$vector[,2], ylim=c(-0.7,+0.7), type="b" ) abline(h=0) plot( eig$vector[,3], ylim=c(-0.7,+0.7), type="b" ) abline(h=0) plot( eig$vector[,4], ylim=c(-0.7,+0.7), type="b" ) abline(h=0) if( save_plots ){ dev.off() } # Problem 1-2: # if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter18/yield_timeseries_fixed_maturity.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(2,2)) plot( yieldDat[,1], ylim=c(4.0,8.0), type="l" ) plot( yieldDat[,2], ylim=c(4.0,8.0), type="l" ) plot( yieldDat[,3], ylim=c(4.0,8.0), type="l" ) plot( yieldDat[,4], ylim=c(4.0,8.0), type="l" ) if( save_plots ){ dev.off() } # Lets look at the Dickey-Fuller test: # library("tseries") for( ii in 1:11 ){ adf_test = adf.test( yieldDat[,ii] ) print( sprintf( "column index= %2d; p_value= %10.6f", ii, adf_test$p.value ) ) } # Problem 3: # n = dim(yieldDat)[1] delta_yield = yieldDat[-1,] - yieldDat[-n,] # Plot yield differences: if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter18/delta_yield_timeseries_fixed_maturity.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(2,2)) plot( delta_yield[,1], type="l" ) plot( delta_yield[,2], type="l" ) plot( delta_yield[,3], type="l" ) plot( delta_yield[,4], type="l" ) if( save_plots ){ dev.off() } # Lets look at the Dickey-Fuller test: # for( ii in 1:11 ){ adf_test = adf.test( delta_yield[,ii] ) print( sprintf( "column index= %2d; p_value= %10.6f", ii, adf_test$p.value ) ) } # Problem 4: # pca_del = princomp(delta_yield) names(pca_del) summary(pca_del) plot(pca_del) # Factor Models on Time Series (extracting the data range we need): # FF_data = read.table("../../BookCode/Data/FamaFrenchDaily.txt",header=T) start_index = which( FF_data$date == "20040102" ) end_index = which( FF_data$date == "20051230" ) FF_data = FF_data[ start_index:end_index, ] FF_data = FF_data[-1,] # delete first row since stocks_diff looses a row due to differencing stocks = read.csv("../../BookCode/Data/Stock_Bond_2004_to_2006.csv",header=T) start_index = which( stocks$DATE == "1/2/2004" ) end_index = which( stocks$DATE == "12/30/2005" ) stocks = stocks[ start_index:end_index, ] stocks_subset = stocks[, c('GM_AC','F_AC','UTX_AC','MRK_AC') ] # select a subset of each stocks adjusted closes stocks_diff = as.data.frame( 100 * apply( log( stocks_subset ), 2, diff ) - FF_data$RF ) # compute each stocks excess return names(stocks_diff) = c("GM","Ford","UTX","Merck") # A fit based only on the market return: fit1 = lm( as.matrix(stocks_diff) ~ FF_data$Mkt.RF ) summary(fit1) # Print the correlation between the residuals (observe how well the model assumptions match reality): # cor( fit1$residuals ) for( i in 1:4 ){ for( j in 1:4 ){ if( j >= i ){ next } tmp = cor.test( fit1$residuals[,i], fit1$residuals[,j] ) print( sprintf("Correlation between %5s and %5s; (%10.6f, %10.6f, %10.6f)", colnames(fit1$residuals)[i], colnames(fit1$residuals)[j], tmp$conf.int[1], tmp$estimate, tmp$conf.int[2] ) ) } } # The sample covariance is: # Sigma_R = cov( stocks_diff ) # Factor based covariance is: # # beta^T Sigma_F beta + Sigma_e # # thus to compute it we need to compute each term in the above decomposition # Sigma_F = var( FF_data$Mkt.RF ) # The only factor return is the market return r_{Market,t} (and is a scalar) # The beta vector are the CAPM beta coefficients (ignore the constant term) as a ROW vector: # transpose needed due to how as.matrix formats result # beta = t( as.matrix( fit1$coefficients[2,] ) ) betaT_Times_Sigma_F_Times_beta = t(beta) %*% ( Sigma_F * beta ) Sigma_e = diag( diag( cov( fit1$residuals ) ), 4, 4 ) Sigma_R_hat = betaT_Times_Sigma_F_Times_beta + Sigma_e abs( Sigma_R - Sigma_R_hat ) # Using the Fama-French model: # fit2 = lm( as.matrix(stocks_diff) ~ FF_data$Mkt.RF + FF_data$SMB + FF_data$HML ) sfit2 = summary(fit2) # Lets look at the p-values for the beta estimates: # # Extract the data "by hand" ... not sure how else to do this ... # sfit2$'Response GM'$coefficients sfit2$'Response Ford'$coefficients sfit2$'Response UTX'$coefficients sfit2$'Response Merck'$coefficients # Look at the correlation of the residuals in the Fama-French model: # cor( fit2$residuals ) for( i in 1:4 ){ for( j in 1:4 ){ if( j >= i ){ next } tmp = cor.test( fit2$residuals[,i], fit2$residuals[,j] ) print( sprintf("Correlation between %5s and %5s; (%10.6f, %10.6f, %10.6f)", colnames(fit2$residuals)[i], colnames(fit2$residuals)[j], tmp$conf.int[1], tmp$estimate, tmp$conf.int[2] ) ) } } if( FALSE ){ # Extract the AIC and the BIC for linear regression: # print( sprintf("AIC(fit1)= %10.6f; AIC(fit2)= %10.6f", AIC(fit1), AIC(fit2)) ) n = dim(stocks_diff)[1] p_fit_1 = 1 p_fit_2 = 3 print( sprintf("BIC(fit1)= %10.6f; BIC(fit2)= %10.6f", AIC(fit1)-2*(1+p_fit_1)+log(n)*(1+p_fit_1), AIC(fit2)-2*(1+p_fit_2)+log(n)*(1+p_fit_2)) ) } # Extract the factor estimated covariance: Sigma_F = cov( FF_data[ , c( 'Mkt.RF', 'SMB', 'HML' ) ] ) beta = as.matrix( fit2$coefficients[2:4,] ) betaT_Times_Sigma_F_Times_beta = t( beta ) %*% Sigma_F %*% beta Sigma_e = diag( diag( cov( fit2$residuals ) ), 4, 4 ) Sigma_R_hat = betaT_Times_Sigma_F_Times_beta + Sigma_e abs( Sigma_R - Sigma_R_hat ) # betas_1 = c( 0.5, 0.4, -0.1 ) sigma_e_1 = 23.0 betas_2 = c( 0.6, 0.15, 0.7 ) sigma_e_2 = 37.0 beta = matrix( c( betas_1, betas_2 ), nrow=3, ncol=2, byrow=F ) t(beta) %*% Sigma_F %*% beta + diag( c(sigma_e_1,sigma_e_2), nrow=2, ncol=2 ) # Problem 13 # dat = read.csv("../../BookCode/Data/Stock_FX_Bond.csv") stocks_ac = dat[,c(3,5,7,9,11,13,15,17)] n = length( stocks_ac[,1] ) stocks_returns = log( stocks_ac[-1,] / stocks_ac[-n,] ) fact = factanal( stocks_returns, factors=2, rotation='none' ) print(fact,cutoff=0.01) if( FALSE ){ # lets try more factors: # fact = factanal( stocks_returns, factors=4, rotation='none' ) print(fact,cutoff=0.01) } loadings = matrix( as.numeric( loadings(fact) ), ncol=2 ) unique = as.numeric( fact$unique ) betas = t(loadings) # this is the format for the betas each stock is a column see page 460 Sigma_R_hat = t(betas) %*% betas + diag( unique, ncol=8 ) print(round(Sigma_R_hat, 4))