# # Written by: # -- # John L. Weatherwax 2007-07-05 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- library(gbm) library(randomForest) library(DAAG) set.seed(0) source('../Chapter10/gen_eq_10_2_data.R') # code to generate the nested spheres data n_sims = 50 # the number of simulations to run p = 10 # the dimension the nested sphere is in # initialize storage to hold the test error according to each run: gbm_1_test_error = matrix( 0, nrow=n_sims, ncol=1 ) gbm_6_test_error = matrix( 0, nrow=n_sims, ncol=1 ) rf_1_test_error = matrix( 0, nrow=n_sims, ncol=1 ) rf_3_test_error = matrix( 0, nrow=n_sims, ncol=1 ) for( nsi in seq(1,n_sims) ){ if( nsi %% 5 == 0 ){ print(sprintf("Performing Simulation Number %5d; %5.2f %% done ... ", nsi, nsi/n_sims)) } # Get data for this simulation: # XTraining = gen_eq_10_2_data(N=2000,p=p); XTraining[ XTraining$Y==-1, p+1 ] = 0 # Map the response "-1" to the value of "0" (required format for the call to gbm): XTesting = gen_eq_10_2_data(N=10000,p=p); XTesting[ XTesting$Y==-1, p+1 ] = 0 # Fit the gradient boosting models: # gb_m = gbm( Y ~ ., data=XTraining, distribution='bernoulli', n.trees=2000, shrinkage=0.05, interaction.depth=1, verbose=FALSE ) Yhat = predict( gb_m, XTesting[,1:p], n.trees=2000 ) pcc = mean( ( ( Yhat <= 0 ) & ( XTesting[,p+1] == 0 ) ) | ( ( Yhat > 0 ) & ( XTesting[,p+1] == 1 ) ) ) gbm_1_test_error[nsi] = 1 - pcc gb_m = gbm( Y ~ ., data=XTraining, distribution='bernoulli', n.trees=2000, shrinkage=0.05, interaction.depth=6, verbose=FALSE ) Yhat = predict( gb_m, XTesting[,1:p], n.trees=2000 ) pcc = mean( ( ( Yhat <= 0 ) & ( XTesting[,p+1] == 0 ) ) | ( ( Yhat > 0 ) & ( XTesting[,p+1] == 1 ) ) ) gbm_6_test_error[nsi] = 1 - pcc # Fit the random forest models: # XTraining[,p+1] = factor(XTraining[,p+1]) XTesting[,p+1] = factor(XTesting[,p+1]) rf_m = randomForest( Y ~ ., data=XTraining, ntree=500, mtry=1 ) # nodesize? rf_1_test_error[nsi] = 1 - mean( predict( rf_m, XTesting[,1:p] ) == XTesting[,p+1] ) rf_m = randomForest( Y ~ ., data=XTraining, ntree=500, mtry=3 ) # nodesize? rf_3_test_error[nsi] = 1 - mean( predict( rf_m, XTesting[,1:p] ) == XTesting[,p+1] ) } # Group everything in a data frame for plotting: # DF = rbind( cbind( rf_1_test_error, rep( "RF-1", n_sims ) ), cbind( rf_3_test_error, rep( "RF-3", n_sims ) ), cbind( gbm_1_test_error, rep( "GBM-1", n_sims ) ), cbind( gbm_6_test_error, rep( "GBM-6", n_sims ) ) ) DF = as.data.frame(DF, stringsAsFactors=FALSE) colnames( DF ) = c("error_rate","method") DF$error_rate = as.double(DF$error_rate) DF$method = as.factor( DF$method ) #postscript("../../WriteUp/Graphics/Chapter15/dup_fig_15_2.eps", onefile=FALSE, horizontal=FALSE) bwplot( method ~ error_rate, data=DF, xlab="test error rate", ylab="learning method" ) #dev.off()