# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 311 # #----- save_plots = F library(caret) # needed for the oil dataset library(AppliedPredictiveModeling) library(pROC) source('build_PCC_linear_models.R') data(oil) table(oilType) table(oilType) / sum(table(oilType)) # Part (c): # zv_cols = nearZeroVar(fattyAcids) print( sprintf("Dropping %d zero variance columns from %d (fraction=%10.6f)", length(zv_cols), dim(fattyAcids)[2], length(zv_cols)/dim(fattyAcids)[2]) ); X = fattyAcids # There are no linearly dependent columns remaining (or to start with) print( findLinearCombos(X) ) # Build linear models with this data: # linear_models = build_PCC_linear_models( X, oilType ) # Present the sampled accuracy estimates for each model: # df = rbind( data.frame(name="LDA", Accuracy=linear_models$lda$confusionMatrix$overall[1]), data.frame(name="GLMNET", Accuracy=linear_models$glmnet$confusionMatrix$overall[1]), data.frame(name="NSC", Accuracy=linear_models$nsc$confusionMatrix$overall[1]) ) rownames(df) = NULL # Order our dataframe by performance: # df = df[ with( df, order(Accuracy) ), ] print( "ACCURACY Performance on the oil dataset" ) print( df ) # For the NSC model ... where is it making its errors: # print( linear_models$nsc$confusionMatrix )