# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 352 # #----- save_plots = F library(caret) # needed for the oil dataset library(AppliedPredictiveModeling) source('build_PCC_nonlinear_models.R') data(oil) # Part (a): # zv_cols = nearZeroVar(fattyAcids) print( sprintf("Dropping %d zero variance columns from %d (fraction=%10.6f)", length(zv_cols), dim(fattyAcids)[2], length(zv_cols)/dim(fattyAcids)[2]) ); X = fattyAcids # There are no linearly dependent columns remaining (or to start with) print( findLinearCombos(X) ) # Build linear models with this data: # nonlinear_models = build_PCC_nonlinear_models( X, oilType ) # Present the sampled accuracy estimates for each model: # df = rbind( data.frame(name="MDA", Accuracy=nonlinear_models$mda$confusionMatrix$overall[1]), data.frame(name="NNET", Accuracy=nonlinear_models$nnet$confusionMatrix$overall[1]), data.frame(name="SVM", Accuracy=nonlinear_models$svm$confusionMatrix$overall[1]), data.frame(name="KNN", Accuracy=nonlinear_models$knn$confusionMatrix$overall[1]), data.frame(name="NB", Accuracy=nonlinear_models$nb$confusionMatrix$overall[1]) ) rownames(df) = NULL # Order our dataframe by performance: # df = df[ with( df, order(Accuracy) ), ] print( "ACCURACY Performance on the oil dataset" ) print( df ) # For the SVM model ... where is it making its errors: # print( nonlinear_models$svm$confusionMatrix )