# # EPage 374 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- save_plots = T library(DAAG) library(MASS) library(rpart) library(randomForest) set.seed(0) sapply(biopsy, function(x)sum(is.na(x))) biops = na.omit( biopsy[,-1] ) rf = randomForest( class ~ ., data=biops ) names(rf) # Part (a) generate several randomForest: # for( i in 1:10 ){ biops.rf = randomForest(class ~ ., data=biops) OOBerr = mean( biops.rf$err.rate[,"OOB"] ) print(paste(i, ": ", round(OOBerr,4), sep="")) print(round(biops.rf$confusion,4)) } # Part (b) compare OOB accuracies with test set accuracies: # n_sims = 500 oobErr_all = matrix( 0, nrow=50, ncol=1 ) testErr_all = matrix( 0, nrow=50, ncol=1 ) for( i in 1:n_sims ){ trRows = sample( 1:dim(biops)[1], size=round( dim(biops)[1]/2 ) ) biops.rf = randomForest( class ~ ., data=biops[trRows,], xtest=biops[-trRows,-10], ytest=biops[-trRows,10] ) oobErr = mean( biops.rf$err.rate[,"OOB"] ) testErr = mean( biops.rf$test$err.rate[,"Test"] ) print( round( c(oobErr,testErr), 4 ) ) oobErr_all[i] = oobErr testErr_all[i] = testErr } if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter11/prob_5_plot.eps", onefile=FALSE, horizontal=FALSE) } plot( oobErr_all, testErr_all, type='p', xlab="out of bag error", ylab="test error" ) abline( a=0, b=1 ) if( save_plots ){ dev.off() } # Part (c): # rf = randomForest( class ~ ., data=biops, xtest=biops[,-10], ytest=biops[,10] ) rf$test$err.rate # note that everything is zero