# # EPage 205 # Data from Appendix D, Table 12 EPage 724 # library(car) save_figs = FALSE source('../../Data/data_loaders.R') DF = load_appendix_academy_bodyfat_data() m = 9 #pairs(DF) scatterplotMatrix(DF) print(summary(DF)) m_full = lm(FAT ~ ., data=DF) print(summary(m_full)) # # Part (a): # Examine the colinearity indicators: # C = cor(DF) print(C) # notice no r_ij > 0.95 print(vif(m_full)) # only one value > 10 ES = eigen(C[ 1:m, 1:m ]) print('Eigenvalues:') print(ES$values) # no eigenvalue is less than 0.05 # # Part (b): # # Variable deletion: # step(m_full) # PC regression: # # Compute the principal components (and a scatter plot of them): # DFs = scale(DF) P = ES$vectors Z = as.matrix(DFs[ , 1:m ]) %*% P colnames(Z) = sprintf('Z%d', seq(1, m)) print(summary(Z)) if( save_figs ){ postscript("../../WriteUp/Graphics/Chapter5/ex_5_9_scatter_plot_of_Z.eps", onefile=FALSE, horizontal=FALSE) } pairs(Z, xlim=c(-3, +3), ylim=c(-3, +3)) if( save_figs ){ dev.off() } # Do PC regression 'by hand': # # (we will drop the last principal component and display the model that we obtain): # DF_Z = data.frame(Z) fat_mean = mean(DF$FAT) DF_Z$FAT = DF$FAT - fat_mean # demeaned response pc_m = lm( FAT ~ . - 1, data=DF_Z ) print(summary(pc_m)) gamma_hat = coefficients(pc_m) # what are the coefficients of the PC regression gamma_hat_modified = gamma_hat gamma_hat_modified[length(gamma_hat_modified)] = 0.0 # set the coefficient of the last PC equal to zero beta_PC = P %*% gamma_hat_modified # what are the beta values for this principal component model rhs = paste(sprintf('%+.3f %s', beta_PC, colnames(DF)[1:m]), sep='', collapse=' ') print('PC model: (dropping last component by hand)') print(sprintf('FAT = %+.3f %s', fat_mean, rhs)) # Do PC regression using an already written function (to double check the above results): # #source('../../../Hastie/Code/Chapter3/pcr_wwx.R') source('http://www.waxworksmath.com/Authors/G_M/Hastie/Code/Chapter3/pcr_wwx.R') res = pcr_wwx(DF, DF) # # Lets verify the outputs from pcr_wwx match what we expect: # # 1) The coefficients computed 'by hand' above (dropping the eigenvector with the smallest eigenvalue) # print('PC verification 1: coefficients by hand') print(as.vector(t(beta_PC))) print('PC verification 1: coefficients from pcr_wwx output') print(res[[1]][, m-1]) # As another verification we will do PC regression using the correlation matrix # to verify that we understand how to get the same results as above with this method: # See Eq. 5.29 in the book # XtX = C[1:m, 1:m] Xty = C[1:m, m+1] beta_c = solve( XtX, Xty ) ES = eigen(XtX) #print('Eigenvalues') #print(round(ES$values, 3)) P = ES$vectors #print('Eigenvectors') #print(round(P, 3)) PT_times_beta_s = t(P) %*% beta_c PT_times_beta_s[m] = 0. # [ I_k, 0; 0, O ] dropping the eigenvector with the smallest eigenvalue beta_pc = ( P %*% PT_times_beta_s ) * sd(DF$FAT) print('PC verification 1: coefficients from correlation matrix') print(as.vector(beta_pc)) # 2) comparing the OLS coefficients: # DFs = data.frame(DFs) # scaled inputs DFs$FAT = DF$FAT - fat_mean # demean the response m_full_2 = lm(FAT ~ . -1, data=DFs) print('PC verification 2: OLS coefficients') print(as.vector(coefficients(m_full_2))) print('PC verification 2: pcr_wwx output') print(res[[1]][, m])