# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 395 # # See EPage 52 for an example of the dummyVars function # #----- save_plots = F # EPage 311 library(caret) library(AppliedPredictiveModeling) library(pROC) library(C50) # needed for the churn dataset data(churn) # loads churnTrain & churnTest X_grouped_categories = churnTrain[,-20] # drop the churn response y = churnTrain[,20] # Look for (and drop) zero variance columns (only column 6 is dropped): zv_cols = nearZeroVar(X_grouped_categories) X_grouped_categories = X_grouped_categories[,-zv_cols] # Convert the grouped predictors into binary (one vs. all) predictors using the caret function "dummyVars": # grouped_predictors_formula = paste( "~", paste( colnames(X_grouped_categories), collapse=" + " ), sep="" ) grouped_to_one_vs_all = dummyVars( grouped_predictors_formula, data=X_grouped_categories ) X_OVA_categories = predict( grouped_to_one_vs_all, newdata=X_grouped_categories ) # OVA=one vs all # Set up the train control arguments so that we can compute the area-under-the-curve: # ctrl = trainControl( summaryFunction=twoClassSummary, classProbs=TRUE ) # Part (a): Build some basic trees on the churn data: # # Grouped categories for all factors: # set.seed(345) rpart.grouped.classifier = train( X_grouped_categories, y, method="rpart", tuneLength=30, metric="ROC", trControl=ctrl ) rpart.grouped.predictions = predict( rpart.grouped.classifier, X_grouped_categories, type="prob" ) rpart.grouped.rocCurve = pROC::roc( response=y, predictor=rpart.grouped.predictions[,1] ) rpart.grouped.auc = rpart.grouped.rocCurve$auc[1] # One vs. all categories for all factors: # set.seed(345) rpart.OVA.classifier = train( X_OVA_categories, y, method="rpart", tuneLength=30, metric="ROC", trControl=ctrl ) rpart.OVA.predictions = predict( rpart.OVA.classifier, X_OVA_categories, type="prob" ) rpart.OVA.rocCurve = pROC::roc( response=y, predictor=rpart.OVA.predictions[,1] ) rpart.OVA.auc = rpart.OVA.rocCurve$auc[1] # How do these two techniques compare: print( c( rpart.grouped.auc, rpart.OVA.auc ) ) # Part (b): Try bagging: # n_predictors = dim(X_grouped_categories)[2] set.seed(345) bagging.grouped.classifier = train( X_grouped_categories, y, method="rf", tuneGrid=data.frame(.mtry=n_predictors), metric="ROC", trControl=ctrl ) bagging.grouped.predictions = predict( bagging.grouped.classifier, X_grouped_categories, type="prob" ) bagging.grouped.rocCurve = pROC::roc( response=y, predictor=bagging.grouped.predictions[,1] ) bagging.grouped.auc = bagging.grouped.rocCurve$auc[1] n_predictors = dim(X_OVA_categories)[2] set.seed(345) bagging.OVA.classifier = train( X_OVA_categories, y, method="rf",tuneGrid=data.frame(.mtry=n_predictors), metric="ROC", trControl=ctrl ) bagging.OVA.predictions = predict( bagging.OVA.classifier, X_OVA_categories, type="prob" ) bagging.OVA.rocCurve = pROC::roc( response=y, predictor=bagging.OVA.predictions[,1] ) bagging.OVA.auc = bagging.OVA.rocCurve$auc[1] # How do the bagging techniques compare: print( c( bagging.grouped.auc, bagging.OVA.auc ) ) # Part (b): Try boosting: # # GBM: gbmGrid = expand.grid( .interaction.depth = seq( 1, 7, by=2 ), .n.trees = seq( 100, 1000, by=100 ), .shrinkage = c(0.01, 0.05, 0.1) ) set.seed(345) gbm.grouped.classifier = train( X_grouped_categories, y, method="gbm", tuneGrid=gbmGrid, metric="ROC", trControl=ctrl, verbose=FALSE ) gbm.grouped.predictions = predict( gbm.grouped.classifier, X_grouped_categories, type="prob" ) gbm.grouped.rocCurve = pROC::roc( response=y, predictor=gbm.grouped.predictions[,1] ) gbm.grouped.auc = gbm.grouped.rocCurve$auc[1] set.seed(345) gbm.OVA.classifier = train( X_OVA_categories, y, method="gbm", tuneGrid=gbmGrid, metric="ROC", trControl=ctrl, verbose=FALSE ) gbm.OVA.predictions = predict( gbm.OVA.classifier, X_OVA_categories, type="prob" ) gbm.OVA.rocCurve = pROC::roc( response=y, predictor=gbm.OVA.predictions[,1] ) gbm.OVA.auc = gbm.OVA.rocCurve$auc[1] # ADA: set.seed(345) ada.grouped.classifier = train( X_grouped_categories, y, method="ada", metric="ROC", trControl=ctrl, verbose=FALSE ) ada.grouped.predictions = predict( ada.grouped.classifier, X_grouped_categories, type="prob" ) ada.grouped.rocCurve = pROC::roc( response=y, predictor=ada.grouped.predictions[,1] ) ada.grouped.auc = ada.grouped.rocCurve$auc[1] set.seed(345) ada.OVA.classifier = train( X_OVA_categories, y, method="ada", metric="ROC", trControl=ctrl, verbose=FALSE ) ada.OVA.predictions = predict( ada.OVA.classifier, X_OVA_categories, type="prob" ) ada.OVA.rocCurve = pROC::roc( response=y, predictor=ada.OVA.predictions[,1] ) ada.OVA.auc = ada.OVA.rocCurve$auc[1] # C5.0: set.seed(345) cfive.grouped.classifier = train( X_grouped_categories, y, method="C5.0", metric="ROC", trControl=ctrl ) cfive.grouped.predictions = predict( cfive.grouped.classifier, X_grouped_categories, type="prob" ) cfive.grouped.rocCurve = pROC::roc( response=y, predictor=cfive.grouped.predictions[,1] ) cfive.grouped.auc = cfive.grouped.rocCurve$auc[1] set.seed(345) cfive.OVA.classifier = train( X_OVA_categories, y, method="C5.0", metric="ROC", trControl=ctrl ) cfive.OVA.predictions = predict( cfive.OVA.classifier, X_OVA_categories, type="prob" ) cfive.OVA.rocCurve = pROC::roc( response=y, predictor=cfive.OVA.predictions[,1] ) cfive.OVA.auc = cfive.OVA.rocCurve$auc[1] # How do the boosting techniques compare: df = rbind( data.frame(name="GBM_grouped" , auc=gbm.grouped.auc ), data.frame(name="GBM_ova" , auc=gbm.OVA.auc ), data.frame(name="ADA_grouped" , auc=ada.grouped.auc ), data.frame(name="ADA_ova" , auc=ada.OVA.auc ), data.frame(name="C5.0_grouped", auc=cfive.grouped.auc), data.frame(name="C5.0_ova", auc=cfive.OVA.auc) ) # Order our dataframe by performance: # df = df[ with( df, order(auc) ), ] print( "AUC Performance" ) print( df ) # Part (c): Try some rule-based methods: # # C5.0Rules: set.seed(345) cfiveRules.grouped.classifier = train( X_grouped_categories, y, method="C5.0Rules", metric="ROC", trControl=ctrl ) cfiveRules.grouped.predictions = predict( cfiveRules.grouped.classifier, X_grouped_categories, type="prob" ) cfiveRules.grouped.rocCurve = pROC::roc( response=y, predictor=cfiveRules.grouped.predictions[,1] ) cfiveRules.grouped.auc = cfiveRules.grouped.rocCurve$auc[1] set.seed(345) cfiveRules.OVA.classifier = train( X_OVA_categories, y, method="C5.0Rules", metric="ROC", trControl=ctrl ) cfiveRules.OVA.predictions = predict( cfiveRules.OVA.classifier, X_OVA_categories, type="prob" ) cfiveRules.OVA.rocCurve = pROC::roc( response=y, predictor=cfiveRules.OVA.predictions[,1] ) cfiveRules.OVA.auc = cfiveRules.OVA.rocCurve$auc[1] # How do the rule-based methods compare: print( c( cfiveRules.grouped.auc, cfiveRules.OVA.auc ) ) summary(cfiveRules.grouped.classifier) # Part (d): Compare the methods: # # For the best model bagging.OVA.classifier / cfiveRules.grouped.classifier what are the most important predictors: # varImp(bagging.OVA.classifier) varImp(cfiveRules.grouped.classifier) # Plot the best rocCurve: # if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter14/chap_14_prob_2_ROC_curves.eps", onefile=FALSE, horizontal=FALSE) } plot( rpart.OVA.rocCurve, legacy.axes=T, add=F, col="gray" ) plot( bagging.grouped.rocCurve, legacy.axes=T, add=T, col="gray" ) plot( cfive.grouped.rocCurve, legacy.axes=T, add=T, col="black" ) if( save_plots ){ dev.off() } # Plot the lift curve for this data: # lp = lift( y ~ yes, data=cfive.grouped.predictions, class="yes" ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter14/chap_14_prob_2_lift_plot.eps", onefile=FALSE, horizontal=FALSE) } plot(lp, main="The lift plot for the C5.0 (grouped) classifier") if( save_plots ){ dev.off() }