# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 464 # #----- source('predictor_importance_utils.R') library(caret) library(AppliedPredictiveModeling) library(C50) # needed for the churn dataset library(CORElearn) data(churn) # loads ChurnTrain & churnTest # Find the categorical predictors: # factor_col_indices = c() for( k in colnames(churnTrain) ){ if( class(churnTrain[,k])=="factor" ){ print(k) factor_col_indices = c( factor_col_indices, which( colnames(churnTrain)==k ) ) } } # What are the categorical features: print( colnames( churnTrain )[factor_col_indices] ) # Compute the correlation between the real valued predictors: # LC = largest_cors( churnTrain[,-factor_col_indices] ) print( head(LC,10)) # b: # # The importance of the categorical predictors in predicting churn (another categorical predictor): # VI = filterVarImp( x=churnTrain[,factor_col_indices[-length(factor_col_indices)]], y=churnTrain\$churn ) # Sort by the value of the variable importance: var_order = order( VI\$yes, decreasing=TRUE ) VI = as.matrix( VI[var_order,] ) print( VI[,"yes"] ) # For the categorical predictors: reliefalues = attrEval( churn ~ ., data=churnTrain[,factor_col_indices], estimator="ReliefFequalK", ReliefIterations=50 ) print( sort( reliefalues, decreasing=TRUE ) ) # c: # # The continuous predictors: # VI = filterVarImp( x=churnTrain[,-factor_col_indices], y=churnTrain\$churn ) # Sort by the value of the variable importance: # var_order = order( VI\$yes, decreasing=TRUE ) VI = as.matrix( VI[var_order,] ) print( VI[,"yes"] ) reliefalues = attrEval( churn ~ ., data=churnTrain[,-factor_col_indices[-length(factor_col_indices)]], estimator="ReliefFequalK", ReliefIterations=50 ) print( sort( reliefalues, decreasing=TRUE ) )