source('../../Data/data_loaders.R') DF = load_appendix_cement_data() null = lm( Y ~ 1, data=DF ) full = lm( Y ~ ., data=DF ) # Forward selection: step( null, scope=list(lower=null, upper=full), direction="forward" ) # Backward selection: step( full, direction="backward" ) # Both directions: step( null, scope=list(upper=full), direction="both" ) # Now try the leaps package: # if( ! require("leaps") ){ install.packages("leaps") library("leaps") } rss = regsubsets( Y~., data=DF ) print( summary( rss ) ) # Now use the routine cv_all_subsets.R: # # http://www.waxworksmath.com/Authors/G_M/Hastie/Code/Chapter3/cv_all_subsets.R # # Code here is modeled after that to be found # # http://www.waxworksmath.com/Authors/G_M/Hastie/Code/Chapter3/dup_OSE_all_subsets.R # source('../../../../G_M/Hastie/Code/Chapter3/cv_all_subsets.R') source('../../../../G_M/Hastie/Code/Chapter3/one_standard_error_rule.R') XTraining = DF p = dim(XTraining)[2]-1 # the last column is the response nSamples = dim(XTraining)[1] # Do best-subset cross validation: # for( k in 0:p ){ print(sprintf('Looking for optimal subsets with %d predictors', k)) res = cv_all_subsets(k,XTraining,numberOfCV=2) if( k==0 ){ complexityParam = k cvResults = res[[1]] bestPredictorFormula = res[[3]] }else{ complexityParam = cbind(complexityParam,k) cvResults = cbind( cvResults, res[[1]] ) # each column is a different value of the complexity parameter ... # each row is a different cross validation sample ... bestPredictorFormula = c( bestPredictorFormula, res[[3]] ) } } # group all cvResults by k (the subset size) and compute statistics: # OSE = one_standard_error_rule( complexityParam, cvResults ) complexVValue = OSE[[1]] complexIndex = OSE[[2]] means = OSE[[4]] stds = OSE[[5]] oseHValue = OSE[[3]] if( ! require("gplots") ){ # plotCI, plotmeans found here install.packages("gplots") library("gplots") } #postscript("../../WriteUp/Graphics/Chapter4/ex_4_14_ci_plot.eps", onefile=FALSE, horizontal=FALSE) plotCI( x=0:p, y=means, uiw=0.5*stds, col="black", barcol="blue", lwd=1, type="l", xlab="subset size (p)", ylab="expected squared prediction error (ESPE)" ) abline( h=oseHValue, lty=2 ) abline( v=complexVValue, lty=2 ) #dev.off()