build_PCC_linear_models = function(x, y, seed_value=456){ # # Builds (using caret's train function) several of the linear models discussed in # this chapter of the book optimizing the probability of correct classification PCC (i.e. accuracy). # # X [n samples, n features] needs to have zero variance columns removed (use nearZeroVar to remove them) and # linear dependent columns removed (use findLinearCombos to remove them) # # y [n samples, 1] is a two factor vector of predictions with the FIRST factor corresponding to the event of interest # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Linear Discriminant Analysis: # set.seed(seed_value) lda.classifier = train( X, y, method="lda", preProc=c("center","scale") ) y_hat = predict( lda.classifier, X ) cm = confusionMatrix( data=y_hat, reference=y ) lda=list( classifier=lda.classifier, confusionMatrix=cm ) # Penalized Methods: # glmnGrid = expand.grid(.alpha=c(0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0), .lambda=seq( 0.01, 0.2, length=40)) set.seed(seed_value) glmnet.classifier = train( X, y, method="glmnet", tuneGrid=glmnGrid, preProc=c("center","scale") ) y_hat = predict( glmnet.classifier, X ) cm = confusionMatrix( data=y_hat, reference=y ) glmnet=list( Classifier=glmnet.classifier, confusionMatrix=cm ) # Nearest Shrunken Centroids: # nscGrid = expand.grid(.threshold=0:25) set.seed(seed_value) nsc.classifier = train( X, y, method="pam", tuneGrid=nscGrid, preProc=c("center","scale") ) y_hat = predict( nsc.classifier, X ) cm = confusionMatrix( data=y_hat, reference=y ) nsc=list( classifier=nsc.classifier, confusionMatrix=cm ) return( list( lda=lda, glmnet=glmnet, nsc=nsc ) ) }