# # Duplicate part of Figure 3.7 (the principal component regression results) from # Chapter 3 of the book ESLII # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- source("load_prostate_data.R") source("cv_PCR_N_PLSR.R") source("one_standard_error_rule.R") PD = load_prostate_data(globalScale=FALSE,trainingScale=TRUE,responseScale=TRUE) # read in unscaled data XTraining = PD[[1]] XTesting = PD[[2]] p = dim(XTraining)[2]-1 # the last column is the response nSamples = dim(XTraining)[1] # REG_CHOICE==1 (does principal component regression), ==2 (does partial least squares regression) REG_CHOICE=1 REG_CHOICE=2 cvResults = cv_PCR_N_PLSR(XTraining,numberOfCV=10, REG_CHOICE) # does numberOfCV for each of the possible values of ncomp (here = p = 8) complexityParam = 0:p OSE = one_standard_error_rule( complexityParam, cvResults$cvraw ) complexVValue = OSE[[1]] complexIndex = OSE[[2]] oseHValue = OSE[[3]] means = OSE[[4]] # as a function of model complexity stds = OSE[[5]] # as a function of model complexity library(gplots) # plotCI, plotmeans found here if( TRUE && REG_CHOICE==1 ){ postscript("../../WriteUp/Graphics/Chapter3/dup_fig_3_7_principal_component_regression.eps", onefile=FALSE, horizontal=FALSE) }else{ postscript("../../WriteUp/Graphics/Chapter3/dup_fig_3_7_partial_least_squares.eps", onefile=FALSE, horizontal=FALSE) } plotCI( x=complexityParam, y=means, uiw=0.5*stds, xlim=c(0,8), ylim=c(0.3,1.8), col="black", barcol="blue", lwd=1, type="l", xlab="degrees of freedom", ylab="expected squared prediction error (ESPE)" ) abline( h=oseHValue, lty=2 ) abline( v=complexVValue, lty=2 ) dev.off() # the above model predict *TWO* degrees of freedom, so with that model we are going to use # do partial least squares regression on all of the training data: # PD = load_prostate_data(globalScale=FALSE,trainingScale=TRUE,responseScale=TRUE) # read in scaled data XTraining = PD[[1]] XTesting = PD[[2]] if( REG_CHOICE==1 ){ m1 = pcr( lpsa~lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45, ncomp=complexVValue, data=XTraining, validation="none" ) }else{ m1 = plsr( lpsa~lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45, ncomp=complexVValue, data=XTraining, validation="none" ) } print( coef(m1), digit=3 ) pdt = predict( m1, ncomp=complexVValue, newdata=XTesting ) pdt = as.matrix( pdt[,1,] ) lpsaTest = XTesting[,p+1] NTest = dim(XTesting)[1] mErr = mean( (lpsaTest - pdt)^2 ) print( mErr ) sErr = sqrt( var( (lpsaTest - pdt)^2 )/NTest ) print( sErr )