# # Implements ADABOOST with naive bayes learning # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 695 # #----- source('naive_bayes.R') draw_samples_according_to_weights = function( X, y, w ){ # # # w = w / sum(w) N = length(y) inds = sample( seq(1,N), size=N, replace=TRUE, prob=w ) return( list( X_samps=X[inds,], y_samps=y[inds] ) ) } adaboost = function( X, y, M=50 ){ # # This implements the ADABOOST code from the book (using decision trees as the base learner). # # X: matrix with each row a case and each column a feature value (attribute value) for that case # y: a column vector with each row the TRUE/FALSE label for the corresponding row in X (can only be either TRUE/FALSE) # N = length(y) # the number of data samples weights = rep( 1/N, N ) # initial weights to hold for each sample hs = list() # holds each hypothesis as we create them zs = list() # holds the hypotheis weights which depends on how well this hypothesis performs on the data # Begin adaboost iterations: # for( m in 1:M ){ # Draw samples (x,y) from training set according to weights: # res = draw_samples_according_to_weights( X, y, weights ) X_boost = res$X_samps y_boost = res$y_samps # Learn a model using this boosted data: # h = naive_bayes_build_model( X_boost, y_boost ) hs[[m]] = h # Make predictions with this hypothesis: # h_prediction = naive_bayes_predict( h, X ) # Accumulate the error: # error = 0 for( j in 1:N ){ if( h_prediction[j] != y[j] ){ error = error + weights[j] } } # Modify the weights (if we have a nonzero error): # for( j in 1:N ){ if( h_prediction[j] == y[j] & error != 0 ){ weights[j] = weights[j] * error / ( 1 - error ) } } weights = weights / sum( weights ) # Update the weight to use with this hypothesis: # zs[[m]] = log( ( 1-error )/error ) } return( list( hypothesis=hs, h_votes=zs ) ) } adaboost_predict_single_sample = function( boosted_bayes, x ){ weights = boosted_bayes$h_votes hypothesis = boosted_bayes$hypothesis wgt_for_TRUE = 0 wgt_for_FALSE = 0 for( j in 1:length(weights) ){ if( naive_bayes_predict( hypothesis[[j]], x )==TRUE ){ wgt_for_TRUE = wgt_for_TRUE + weights[[j]] }else{ wgt_for_FALSE = wgt_for_FALSE + weights[[j]] } } if( wgt_for_TRUE > wgt_for_FALSE ){ return(TRUE) }else{ return(FALSE) } } adaboost_predict_multiple_samples = function( boosted_bayes, X ){ # # Predicts TRUE/FALSE label for a many samples using boosted naive bayes learners # n_samples = dim(X)[1] y = rep( FALSE, n_samples ) # some initial values for( si in 1:n_samples ){ y[si] = adaboost_predict_single_sample( boosted_bayes, X[si,] ) } return(y) }