#
# EPage 374
#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

save_plots = T

library(DAAG)
library(MASS)
library(rpart)
library(randomForest)

set.seed(0)

sapply(biopsy, function(x)sum(is.na(x)))
biops = na.omit( biopsy[,-1] )
rf = randomForest( class ~ ., data=biops )
names(rf)

# Part (a) generate several randomForest:
#
for( i in 1:10 ){
  biops.rf = randomForest(class ~ ., data=biops)
  OOBerr = mean( biops.rf$err.rate[,"OOB"] )
  print(paste(i, ": ", round(OOBerr,4), sep=""))
  print(round(biops.rf$confusion,4))
}

# Part (b) compare OOB accuracies with test set accuracies:
#
n_sims = 500
oobErr_all = matrix( 0, nrow=50, ncol=1 )
testErr_all = matrix( 0, nrow=50, ncol=1 )
for( i in 1:n_sims ){
  trRows = sample( 1:dim(biops)[1], size=round( dim(biops)[1]/2 ) )
  biops.rf = randomForest( class ~ ., data=biops[trRows,], xtest=biops[-trRows,-10], ytest=biops[-trRows,10] )
  oobErr = mean( biops.rf$err.rate[,"OOB"] )
  testErr = mean( biops.rf$test$err.rate[,"Test"] )
  print( round( c(oobErr,testErr), 4 ) )
  oobErr_all[i] = oobErr
  testErr_all[i] = testErr
}

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter11/prob_5_plot.eps", onefile=FALSE, horizontal=FALSE) }
plot( oobErr_all, testErr_all, type='p', xlab="out of bag error", ylab="test error" )
abline( a=0, b=1 )
if( save_plots ){ dev.off() }

# Part (c):
#
rf = randomForest( class ~ ., data=biops, xtest=biops[,-10], ytest=biops[,10] )
rf$test$err.rate # note that everything is zero