#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----

set.seed(0)

source('../Chapter18/decision_tree_learning.R')

source('./majority_function_gen_data.R')
source('./perceptron_learning.R')

dataset_dim = 11

training_set_sizes = seq( 10, 100, by=10 )
tree_testing_set_accuracy = c()
perc_testing_set_accuracy = c()
for( tss in training_set_sizes ){
  mcs_tree = c()
  mcs_perc = c()
  for( mci in 1:10 ){ 
    if( mci!=1 ){
      print( sprintf("MAJORITY: Train size %10d; mci= %5d; tree_accuracy= %5.3f; perc_accuracy= %5.3f", tss, mci, mean(mcs_tree), mean(mcs_perc) ) )
    }

    # Get some training data from the majority function:
    # 
    D_train = majority_function_gen_data(tss,d=dataset_dim)
    X = D_train$X
    X_string = D_train$X_string
    y = D_train$y
    y_bool = D_train$y_bool

    # Learn a DECISION TREE with a training set of the given size:
    #
    attributes = list() # first build "attributes" variable
    for( nm in colnames(X_string) ){
      attributes[[nm]] = c('T','F')
    }
    default = majority_vote( y_bool )

    tree = decision_tree_learning( X_string, y_bool, attributes, default )

    # Learn a PERCEPTRON with a training set of the given size:
    #
    if( mci == 1 ){
        perceptron = perceptron_build_model( X, y )
    }else{
        perceptron = perceptron_build_model( X, y, W_0=perceptron )
    }

    # Generate new data to test with:
    #
    D_test = majority_function_gen_data(tss,d=dataset_dim)

    yhat = decision_tree_predict_multiple_samples( tree, D_test$X_string )
    acc = sum(D_test$y_bool == yhat)/tss
    mcs_tree = c(mcs_tree,acc)

    yhat = perceptron_predict( perceptron, D_test$X )
    acc = sum(D_test$y_bool == yhat)/tss
    mcs_perc = c(mcs_perc,acc)
  }
  tree_testing_set_accuracy = c(tree_testing_set_accuracy, mean(mcs_tree))
  perc_testing_set_accuracy = c(perc_testing_set_accuracy, mean(mcs_perc))
}

postscript("../../WriteUp/Graphics/Chapter20/chap_20_dup_fig_22_majority_function.eps", onefile=FALSE, horizontal=FALSE)

plot( training_set_sizes, tree_testing_set_accuracy, col='red', type='l', xlab='training set size', ylab='Proporation correct on test set', ylim=c(0.4,1.0) )
lines( training_set_sizes, perc_testing_set_accuracy, col='green', type='l' )

grid()

legend( 50, 1.0, c('decision tree test accuracy','perceptron test accuracy'), lty=1, col=c('red','green') )

dev.off()