cv_pcr_wwx <- function( D, numberOfCV=10 ){ # # Does Cross Validation of the Principal Component Regression (PCR) # # Input: # D = training data frame where the last column is the response variable # numberOfCV = number of cross validations to do # # Output: # MSPE = matrix of size [ numberOfCV \times ncomp ] of mean square prediction errors # extracted from each of the numberOfCV test data sets for each of the ncomp # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- nSamples = dim( D )[1] p = dim( D )[2] - 1 # the last column is the response responseName = names(D)[p+1] # the the name of the response # needed for the cross vaildation (CV) loops: # nCVTest = round( nSamples*(1/numberOfCV) ) # each CV run will have this many test points nCVTrain = nSamples-nCVTest # each CV run will have this many training points for (cvi in 1:numberOfCV) { testInds = (cvi-1)*nCVTest + 1:nCVTest # this may attempt to access sample indexes > nSamples testInds = intersect( testInds, 1:nSamples ) # so we restrict this list here nCVTest = length(testInds) DCVTest = D[testInds,1:(p+1)] # get the predictor + response testing data # select this cross validation's section of data from all the training data: # trainInds = setdiff( 1:nSamples, testInds ) nCVTrain = length(trainInds) DCV = D[trainInds,1:(p+1)] # get the predictor + response training data res = pcr_wwx( DCV, DCVTest ) pdt = res[[3]] # create a matrix from the testing response of size [ nCVTest, ncomp+1 ] responseTest = DCVTest[,p+1] responseTestAsMatrix = matrix( data=rep( responseTest, p+1 ), ncol=p+1 ) if( cvi==1 ){ predmat = pdt y = responseTestAsMatrix }else{ predmat = rbind( predmat, pdt ) y = rbind( y, responseTestAsMatrix ) } } # endfor cvi loop # this code is modeled after the code in "glmnet/cvelnet.R" # N = dim(y)[1] cvraw = ( y - predmat )^2 cvm = apply(cvraw,2,mean) cvsd = sqrt( apply(cvraw,2,var)/N ) l = list( cvraw=cvraw, cvm=cvm, cvsd=cvsd, name="Mean Squared Error" ) return( l ) }