#
# Implements ADABOOST with decision trees
#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
# EPage 695
# 
#-----

source('decision_tree_learning.R')


draw_samples_according_to_weights = function( X, y, w ){
  #
  #
  #
  w = w / sum(w)

  N = length(y)
  
  inds = sample( seq(1,N), size=N, replace=TRUE, prob=w )

  return( list( X_samps=X[inds,], y_samps=y[inds] ) )
}


adaboost = function( X, y, M=50, max_depth=1 ){
  #
  # This implements the ADABOOST code from the book (using decision trees as the base learner).
  #
  # X: matrix with each row a case and each column a feature value (attribute value) for that case
  # y: a column vector with each row the TRUE/FALSE label for the corresponding row in X (can only be either TRUE/FALSE) 
  #
  N = length(y) # the number of data samples

  weights = rep( 1/N, N ) # initial weights to hold for each sample 
  hs = list() # holds each hypothesis as we create them
  zs = list() # holds the hypotheis weights which depends on how well this hypothesis performs on the data 

  # For decision_tree_learing we need some initial things: 
  # 
  # Create the initial attributes variable (an R list with items and keys as above):
  # Note our training set must have seen the possible values for each attribute.
  #
  attributes = list()
  for( cn in colnames(X) ){
    attributes[[cn]] = sort(as.matrix(unique(X[cn])))
  }

  # Compute the default classification: 
  #
  default = majority_vote(y) 

  # Begin adaboost iterations:
  #   
  for( m in 1:M ){

    # Draw samples (x,y) from training set according to weights:
    #
    res = draw_samples_according_to_weights( X, y, weights )
    X_boost = res$X_samps
    y_boost = res$y_samps

    # Learn a model using this boosted data:
    # 
    h = decision_tree_learning( X_boost, y_boost, attributes, default, max_depth=max_depth )
    hs[[m]] = h

    # Make predictions with this hypothesis:
    #
    h_prediction = rep( TRUE, 1, N )
    for( j in 1:N ){
      h_prediction[j] = decision_tree_predict_single_sample( h, X[j,] )
    }

    # Accumulate the error:
    # 
    error = 0
    for( j in 1:N ){
      if( h_prediction[j] != y[j] ){ 
        error = error + weights[j]
      }
    }

    # Modify the weights (if we have a nonzero error):
    # 
    for( j in 1:N ){
      if( h_prediction[j] == y[j] & error != 0 ){
        weights[j] = weights[j] * error / ( 1 - error ) 
      }
    }
    weights = weights / sum( weights )

    # Update the weight to use with this hypothesis:
    # 
    zs[[m]] = log( ( 1-error )/error )
  }

  return( list( trees=hs, tree_votes=zs ) )
}


adaboost_predict_single_sample = function( boosted_trees, x ){

  weights = boosted_trees$tree_votes
  hypothesis = boosted_trees$trees

  wgt_for_TRUE  = 0
  wgt_for_FALSE = 0 
  for( j in 1:length(weights) ){
    if( decision_tree_predict_single_sample( hypothesis[[j]], x )==TRUE ){
      wgt_for_TRUE = wgt_for_TRUE + weights[[j]] 
    }else{
      wgt_for_FALSE = wgt_for_FALSE + weights[[j]]
    }
  }

  if( wgt_for_TRUE > wgt_for_FALSE ){
    return(TRUE)
  }else{
    return(FALSE)
  }
}


adaboost_predict_multiple_samples = function( boosted_trees, X ){
  #
  # Predicts TRUE/FALSE label for a many samples using a decision tree
  # Note: I was not sure how to code the prediction algorithm in a vectorized way.
  # If anyone knows how to do such a thing please contact me.
  #
  n_samples = dim(X)[1]
  y = rep( FALSE, n_samples ) # some initial values 
  for( si in 1:n_samples ){
    y[si] = adaboost_predict_single_sample( boosted_trees, X[si,] )
  } 
  return(y)
}