# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- set.seed(0) source('../Chapter18/decision_tree_learning.R') source('./majority_function_gen_data.R') source('./perceptron_learning.R') dataset_dim = 11 training_set_sizes = seq( 10, 100, by=10 ) tree_testing_set_accuracy = c() perc_testing_set_accuracy = c() for( tss in training_set_sizes ){ mcs_tree = c() mcs_perc = c() for( mci in 1:10 ){ if( mci!=1 ){ print( sprintf("MAJORITY: Train size %10d; mci= %5d; tree_accuracy= %5.3f; perc_accuracy= %5.3f", tss, mci, mean(mcs_tree), mean(mcs_perc) ) ) } # Get some training data from the majority function: # D_train = majority_function_gen_data(tss,d=dataset_dim) X = D_train$X X_string = D_train$X_string y = D_train$y y_bool = D_train$y_bool # Learn a DECISION TREE with a training set of the given size: # attributes = list() # first build "attributes" variable for( nm in colnames(X_string) ){ attributes[[nm]] = c('T','F') } default = majority_vote( y_bool ) tree = decision_tree_learning( X_string, y_bool, attributes, default ) # Learn a PERCEPTRON with a training set of the given size: # if( mci == 1 ){ perceptron = perceptron_build_model( X, y ) }else{ perceptron = perceptron_build_model( X, y, W_0=perceptron ) } # Generate new data to test with: # D_test = majority_function_gen_data(tss,d=dataset_dim) yhat = decision_tree_predict_multiple_samples( tree, D_test$X_string ) acc = sum(D_test$y_bool == yhat)/tss mcs_tree = c(mcs_tree,acc) yhat = perceptron_predict( perceptron, D_test$X ) acc = sum(D_test$y_bool == yhat)/tss mcs_perc = c(mcs_perc,acc) } tree_testing_set_accuracy = c(tree_testing_set_accuracy, mean(mcs_tree)) perc_testing_set_accuracy = c(perc_testing_set_accuracy, mean(mcs_perc)) } postscript("../../WriteUp/Graphics/Chapter20/chap_20_dup_fig_22_majority_function.eps", onefile=FALSE, horizontal=FALSE) plot( training_set_sizes, tree_testing_set_accuracy, col='red', type='l', xlab='training set size', ylab='Proporation correct on test set', ylim=c(0.4,1.0) ) lines( training_set_sizes, perc_testing_set_accuracy, col='green', type='l' ) grid() legend( 50, 1.0, c('decision tree test accuracy','perceptron test accuracy'), lty=1, col=c('red','green') ) dev.off()