library(pROC) library(kernlab) build_AUC_nonlinear_models = function(X, y, seed_value=456, build_mda_model=TRUE){ # # Builds (using caret's train function) several of the nonlinear models discussed in # this chapter of the book optimizing the area-under-the-curve metric (AUC) # # X [n samples, n features] needs to have zero variance columns removed (use nearZeroVar to remove them) and # linear dependent columns removed (use findLinearCombos to remove them) # # y [n samples, 1] is a two factor vector of predictions with the FIRST factor corresponding to the event of interest # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Set up the train control arguments so that we can compute the area-under-the-curve: # ctrl = trainControl( summaryFunction=twoClassSummary, classProbs=TRUE ) # Mixture Discriminant Analysis (MDA): # if( build_mda_model ){ set.seed(seed_value) mda.classifier = train( X, y, method="mda", tuneGrid=expand.grid(subclasses=1:3), metric="ROC", trControl=ctrl ) mda.predictions = predict( mda.classifier, X, type="prob" ) mda.rocCurve = pROC::roc( response=y, predictor=mda.predictions[,1] ) mda.auc = mda.rocCurve$auc[1] mda=list( classifier=mda.classifier, roc=mda.rocCurve, auc=mda.auc ) } # Neural Networks: # set.seed(seed_value) nnetGrid = expand.grid( size=1:3, decay=c(0,0.1,1,2) ) nnet.classifier = train( X, y, method="nnet", preProc=c("center","scale","spatialSign"), tuneGrid=nnetGrid, metric="ROC", trace=FALSE, maxit=2000, trControl=ctrl ) nnet.predictions = predict( nnet.classifier, X, type="prob" ) # <- returns probability of "Yes" (event of interest) & "No" nnet.rocCurve = pROC::roc( response=y, predictor=nnet.predictions[,1] ) nnet.auc = nnet.rocCurve$auc[1] nnet=list( classifier=nnet.classifier, roc=nnet.rocCurve, auc=nnet.auc ) # Support Vector Machines: # set.seed(seed_value) sigmaEst = kernlab::sigest( as.matrix(X) ) svarid = expand.grid(sigma=sigmaEst[1], C=2^seq(-4,+4)) svm.classifier = train( X, y, method="svmRadial", tuneGrid=svarid, preProc=c("center","scale"), metric="ROC", fit=FALSE, trControl=ctrl ) svm.predictions = predict( svm.classifier, X, type="prob" ) svm.rocCurve = pROC::roc( response=y, predictor=svm.predictions[,1] ) svm.auc = svm.rocCurve$auc[1] svm=list( classifier=svm.classifier, roc=svm.rocCurve, auc=svm.auc ) # K-Nearest Neighbors: # set.seed(seed_value) knn.classifier = train( X, y, method="knn", tuneLength=20, preProc=c("center","scale"), metric="ROC", trControl=ctrl ) knn.predictions = predict( knn.classifier, X, type="prob" ) knn.rocCurve = pROC::roc( response=y, predictor=knn.predictions[,1] ) knn.auc = knn.rocCurve$auc[1] knn=list( classifier=knn.classifier, roc=knn.rocCurve, auc=knn.auc ) # Naive Bayes: # set.seed(seed_value) nb.classifier = train( X, y, method="nb", metric="ROC", trControl=ctrl ) nb.predictions = predict( nb.classifier, X, type="prob" ) nb.rocCurve = pROC::roc( response=y, predictor=nb.predictions[,1] ) nb.auc = nb.rocCurve$auc[1] nb=list( classifier=nb.classifier, roc=nb.rocCurve, auc=nb.auc ) result = list( nnet=nnet, svm=svm, knn=knn, nb=nb ) if( build_mda_model ){ result = c(result, list(mda=mda)) } return( result ) }