two_class_LDA_with_optimal_cut_point_pair_vowel<- function(XTrain,labelsTrain,XTest,labelsTest, indexClassOne,indexClassTwo ){ # # R code to compare the classification of # two class LDA (with the optimal cut point) specified vs. standard LDA # # See the section entitled Linear discriminant Analysis where this is suggesteed # in Chapter 4 from the book ESLII # # Inputs: # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- source("two_class_LDA_with_optimal_cut_point.R") inds = ( labelsTrain == indexClassOne ) | ( labelsTrain == indexClassTwo ) XTrain = XTrain[ inds, ] labelsTrain = labelsTrain[ inds ] inds = ( labelsTest == indexClassOne ) | ( labelsTest == indexClassTwo ) XTest = XTest[ inds, ] labelsTest = labelsTest[ inds ] # map all classification labels to the integers #1 and #2 so that LDA words correctly below: # uLabels = sort(unique(labelsTrain)) inds1 = labelsTrain == uLabels[1] inds2 = !inds1 labelsTrain[inds1] = 1 labelsTrain[inds2] = 2 inds1 = labelsTest == uLabels[1] inds2 = !inds1 labelsTest[inds1] = 1 labelsTest[inds2] = 2 out = two_class_LDA_with_optimal_cut_point( XTrain, labelsTrain, doPlots=F ) A = out[[1]] # the expression such that x^T A is the discrimiant cp = out[[2]] # the optimal cut point eRateTrain = out[[3]] # the optmimal error rate # Classify the testing set using this method: # N = dim( XTest )[1] uLabels = unique(labelsTrain) predictedLabels = mat.or.vec( N, 1 ) + uLabels[1] # everything starts predicted as class #1 discriminant = as.matrix( XTest ) %*% A class2 = discriminant > cp # but these would be classified as class #2 predictedLabels[class2] = uLabels[2] eRateTest = sum( abs(predictedLabels - labelsTest)!=0 ) / length(labelsTest) eRLDAOptTrain = eRateTrain eRLDAOptTest = eRateTest #print(sprintf("%20s: (%3d, %3d): eRateTraining= %10.6f; eRateTest= %10.6f","LDA with OCP",indexClassOne,indexClassTwo,eRateTrain,eRateTest)) # # TRAIN A LDA MODEL: # ldam = lda( XTrain, labelsTrain ) # get this models predictions on the training data # predTrain = predict( ldam, XTrain ) tpLabels = as.double( predTrain$class ) numCC = sum( (tpLabels - labelsTrain) == 0 ) numICC = length(tpLabels)-numCC eRateTrain = numICC / length(tpLabels) # get this models predictions on the testing data # predTest = predict( ldam, XTest ) tpLabels = as.double( predTest$class ) numCC = sum( (tpLabels - labelsTest) == 0 ) numICC = length(tpLabels)-numCC eRateTest = numICC / length(tpLabels) eRLDATrain = eRateTrain eRLDATest = eRateTest #print(sprintf("%20s: (%3d, %3d): eRateTraining= %10.6f; eRateTest= %10.6f","LDA",indexClassOne,indexClassTwo,eRateTrain,eRateTest)) print(sprintf("%20s: (%3d, %3d): eRateTest= %10.6f/%10.6f","LDA with OCP vs. LDA",indexClassOne,indexClassTwo,eRLDAOptTest,eRLDATest)) if( eRLDAOptTest < eRLDATest ){ # return whether the new method is better or not return( TRUE ) }else{ return( FALSE ) } }