# # Duplicate part of Figure 3.7 (the lasso regularization results) from Chapter 3 of the book ESLII # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- source("load_prostate_data.R") PD = load_prostate_data(globalScale=FALSE,trainingScale=TRUE,responseScale=TRUE) # read in unscaled data XTraining = PD[[1]] XTesting = PD[[2]] p = dim(XTraining)[2]-1 # the last column is the response nSamples = dim(XTraining)[1] library(glmnet) # alpha = 1 => lasso fit = glmnet( as.matrix( XTraining[,1:p] ), XTraining[,p+1], family="gaussian", alpha=1 ) # alpha = 0 => ridge #fit = glmnet( as.matrix( XTraining[,1:p] ), XTraining[,p+1], family="gaussian", alpha=0 ) postscript("../../WriteUp/Graphics/Chapter3/fig_3_10_dup.eps", onefile=FALSE, horizontal=FALSE) plot(fit) dev.off() # do crossvalidation to get the optimal value of lambda cvob = cv.glmnet( as.matrix( XTraining[,1:p] ), XTraining[,p+1], family="gaussian", alpha=1 ) postscript("../../WriteUp/Graphics/Chapter3/dup_fig_3_7_lasso.eps", onefile=FALSE, horizontal=FALSE) plot( cvob ) dev.off() # get the optimal value of lambda: lambdaOptimal = cvob$lambda.1se # refit with this opimal value of lambda: fitOpt = glmnet( as.matrix( XTraining[,1:p] ), XTraining[,p+1], family="gaussian", lambda=lambdaOptimal, alpha=1 ) print( coef(fitOpt), digit=3 ) # predict the testing data using this value of lambda: yPredict = predict( fit, newx=as.matrix(XTesting[,1:p]), s=lambdaOptimal ) NTest = dim(XTesting[,1:p])[1] print( mean( ( XTesting[,p+1] - yPredict )^2 ), digit=3 ) print( sqrt( var( ( XTesting[,p+1] - yPredict )^2 )/NTest ), digit=3 )