# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 138 # #----- if( ! require("ISLR") ){ install.packages("ISLR") } if( ! require("glmnet") ){ install.packages("glmnet") } if( ! require("pls") ){ install.packages("pls") } library(MASS) set.seed(0) n = dim(Boston)[1] p = dim(Boston)[2] train = sample(c(TRUE,FALSE), n, rep=TRUE) # will roughly assign TRUE to one-half of the data (FALSE to the other half). test = (!train) Boston_train = Boston[train,] Boston_test = Boston[test,] # The full linear model: # m = lm( crim ~ ., data=Boston_train ) Y_hat = predict( m, newdata=Boston_test ) MSE = mean( ( Boston_test$crim - Y_hat )^2 ) print( sprintf( "Linear model test MSE= %10.3f", MSE ) ) # Ridge regression: # Y = Boston_train$crim MM = model.matrix( crim ~ ., data=Boston_train ) cv.out = cv.glmnet( MM, Y, alpha=0 ) plot( cv.out ) bestlam = cv.out$lambda.1se #print( "ridge regression CV best value of lambda (one standard error)" ) #print( bestlam ) ridge.mod = glmnet( MM, Y, alpha=0 ) Y_hat = predict( ridge.mod, s=bestlam, newx=model.matrix( crim ~ ., data=Boston_test ) ) MSE =mean( ( Boston_test$crim - Y_hat )^2 ) print( sprintf( "Ridge regression test MSE= %10.3f", MSE ) ) # The Lasso: # cv.out = cv.glmnet( MM, Y, alpha=1 ) plot( cv.out ) bestlam = cv.out$lambda.1se #print( "lasso CV best value of lambda (one standard error)" ) #print( bestlam ) lasso.mod = glmnet( MM, Y, alpha=1 ) Y_hat = predict( lasso.mod, s=bestlam, newx=model.matrix( crim ~ ., data=Boston_test ) ) MSE =mean( ( Boston_test$crim - Y_hat )^2 ) print( sprintf( "Lasso regression test MSE= %10.3f", MSE ) ) print( "lasso coefficients" ) print( predict( lasso.mod, type="coefficients", s=bestlam ) ) # Principle Component Regression: # pcr.mod = pcr( crim ~ ., data=Boston_train, scale=TRUE, validation="CV" ) # Use this to select the number of components to include ... looks like CV suggests # we should use 3 predictors # validationplot( pcr.mod, val.type="MSEP" ) ncomp = 3 Y_hat = predict( pcr.mod, Boston_test, ncomp=ncomp ) MSE = mean( ( Boston_test$crim - Y_hat )^2 ) print( sprintf( "PCR (with ncomp= %5d) test MSE= %10.3f", ncomp, MSE ) ) # Paritial Least Squares: # pls.mod = plsr( crim ~ ., data=Boston_train, scale=TRUE, validation="CV" ) # Use this to select the number of components to include ... looks like CV suggests # the best is to use 5 predictors # validationplot( pls.mod, val.type="MSEP" ) ncomp=5 Y_hat = predict( pls.mod, Boston_test, ncomp=ncomp ) MSE = mean( ( Boston_test$crim - Y_hat )^2 ) print( sprintf( "PLS (with ncomp= %5d) test MSE= %10.3f", ncomp, MSE ) )