#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
# EPage 395
#
# See EPage 52 for an example of the dummyVars function
#
#-----

save_plots = F

# EPage 311
#
library(caret)
library(AppliedPredictiveModeling) # needed for the hepatic dataset
data(hepatic)

set.seed(714)

indx = createFolds(injury, returnTrain=TRUE)
ctr = trainControl(method="cv", index=indx)
mtryValues = c(5, 10, 25, 50, 75, 100)

# The rfCART model:
#
set.seed(1234)
rfCART = train( chem, injury, method="rf", metric="Kappa", ntree=1000, tuneGrid=data.frame(.mtry=mtryValues) )
y_hat = predict( rfCART, chem )
rfCART.cm = confusionMatrix( data=y_hat, reference=injury )

# The cforest model:
#
set.seed(1234)
rfcForest = train( chem, injury, method="cforest", metric="Kappa", tuneGrid=data.frame(.mtry=mtryValues) )
y_hat = predict( rfcForest, chem )
rfcForest.cm = confusionMatrix( data=y_hat, reference=injury )

# How to the two methods compare:
print( c( rfCART.cm$overall[2], rfcForest.cm$overall[2] ) )

# What is the timing comparison between the two models:
print( rfCART$times$everything )
print( rfcForest$times$everything )

# What are the selected important variables
varImp(rfCART)
varImp(rfcForest)