# # Runs the code "pcr_wwx.R" # # Written by: # -- # John L. Weatherwax 2010-03-02 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- source("load_prostate_data.R") source("pcr_wwx.R") source("cv_pcr_wwx.R") source("one_standard_error_rule.R") PD = load_prostate_data(globalScale=FALSE,trainingScale=FALSE,responseScale=FALSE) # read in unscaled data XTraining = PD[[1]] XTesting = PD[[2]] # this generates "statistics" on how well this method performd "out of sample" cvResults = cv_pcr_wwx(XTraining,numberOfCV=10) # does numberOfCV for each of the possible values of ncomp (here = p = 8) p = dim(XTraining)[2]-1 complexityParam = 0:p # this selects a model based on the "out of sample" variance OSE = one_standard_error_rule( complexityParam, cvResults$cvraw ) complexVValue = OSE[[1]] # potentially a continious value complexIndex = OSE[[2]] # the index that indicates the above "value" oseHValue = OSE[[3]] means = OSE[[4]] # as a function of model complexity stds = OSE[[5]] # as a function of model complexity # we can plot the results of our out of sample runs here: library(gplots) # plotCI, plotmeans found here plotCI( x=complexityParam, y=means, uiw=0.5*stds, xlim=c(0,8), ylim=c(0.3,1.8), col="black", barcol="blue", lwd=1, type="l", xlab="degrees of freedom", ylab="expected squared prediction error (ESPE)" ) abline( h=oseHValue, lty=2 ) abline( v=complexVValue, lty=2 ) # the above plot specifies *7* coefficients ... thus we want to retrain over the entire data # set and predict with this number of components # res = pcr_wwx( XTraining, XTesting ) yHat = res[[3]][,complexIndex] print( mean( (yHat - XTesting[,p+1])^2 ), digits=3 ) print( sqrt( var( (yHat - XTesting[,p+1])^2 )/dim(XTesting)[1] ), digits=3 )