# # R code to load in the vowel data set from the book ESLII # # Output: # # res: list of data frames XT # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- library(MASS) # this has functions for lda and qda source("load_vowel_data.R") source("linear_regression_indicator_matrix.R") out = load_vowel_data( FALSE ) XTrain = out[[1]] labelsTrain = out[[2]] XTest = out[[3]] labelsTest = out[[4]] #plot( X[,1], X[,2] ) # # TRAIN A LINEAR REGRESSION BASED MODEL: # out = linear_regression_indicator_matrix(XTrain,labelsTrain) Bhat = out[[1]] Yhat = out[[2]] tpLabels = out[[3]] numCC = sum( (tpLabels - labelsTrain) == 0 ) numICC = length(tpLabels)-numCC eRateTrain = numICC / length(tpLabels) # predict on the testing data with this classifier: # N = length(labelsTest) ones = as.matrix( mat.or.vec( N, 1 ) + 1.0 ) Xm = as.matrix( cbind( ones, XTest ) ) tpLabels = apply( Xm %*% Bhat, 1, 'which.max' ) numCC = sum( (tpLabels - labelsTest) == 0 ) numICC = length(tpLabels)-numCC eRateTest = numICC / length(tpLabels) print(sprintf("%40s: %10.6f; %10.6f","Linear Regression",eRateTrain,eRateTest)) # # TRAIN A LDA MODEL: # ldam = lda( XTrain, labelsTrain ) # get this models predictions on the training data # predTrain = predict( ldam, XTrain ) tpLabels = as.double( predTrain$class ) numCC = sum( (tpLabels - labelsTrain) == 0 ) numICC = length(tpLabels)-numCC eRateTrain = numICC / length(tpLabels) # get this models predictions on the testing data # predTest = predict( ldam, XTest ) tpLabels = as.double( predTest$class ) numCC = sum( (tpLabels - labelsTest) == 0 ) numICC = length(tpLabels)-numCC eRateTest = numICC / length(tpLabels) print(sprintf("%40s: %10.6f; %10.6f","Linear Discriminant Analysis (LDA)",eRateTrain,eRateTest)) # # TRAIN A QDA MODEL: # qdam = qda( XTrain, labelsTrain ) # get this models predictions on the training data # predTrain = predict( qdam, XTrain ) tpLabels = as.double( predTrain$class ) numCC = sum( (tpLabels - labelsTrain) == 0 ) numICC = length(tpLabels)-numCC eRateTrain = numICC / length(tpLabels) # get this models predictions on the testing data # predTest = predict( qdam, XTest ) tpLabels = as.double( predTest$class ) numCC = sum( (tpLabels - labelsTest) == 0 ) numICC = length(tpLabels)-numCC eRateTest = numICC / length(tpLabels) print(sprintf("%40s: %10.6f; %10.6f","Quadratic Discriminant Analysis (QDA)",eRateTrain,eRateTest)) # # TRAIN A LOGISTIC REGRESSION MODEL: # # Based on code found here: # # http://www.ats.ucla.edu/stat/r/dae/mlogit.htm # library(nnet) fm = data.frame( cbind(XTrain,labelsTrain) ) m = multinom( labelsTrain ~ x.1 + x.2 + x.3 + x.4 + x.5 + x.6 + x.7 + x.8 + x.9 + x.10, data=fm ) summary(m) yhat_train = predict( m, newdata=XTrain, "class" ) yhat_test = predict( m, newdata=XTest, "class" ) numCC = sum( (as.integer(yhat_train) - labelsTrain) == 0 ) numICC = length(labelsTrain)-numCC eRateTrain = numICC / length(labelsTrain) numCC = sum( (as.integer(yhat_test) - labelsTest) == 0 ) numICC = length(labelsTest)-numCC eRateTest = numICC / length(labelsTest) print(sprintf("%40s: %10.6f; %10.6f","Logistic Regression",eRateTrain,eRateTest))