#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
# EPage 787
# 
#-----

naive_bayes_build_model = function( X, y ){
  # 
  # We assume that in the dataframe X we have observed at least one feature value
  # from the set of possible feature values for that feature i.e. if the list of possible
  # values for restaurant type is : { Burger, French, Italian, Thai } then we have to have
  # have at least one input vector from our training set that has each one of these types. 
  # 
  # Note we assume that the output y has only two states (TRUE or FALSE)
  #
  # We use Laplacian smoothing to estimate the probabilities in each table
  # 

  n_samples  = dim(X)[1] # the number of samples
  n_features = dim(X)[2] # the number of features 

  # Compute the apriori distribution of true/false:
  # 
  P_true  = sum(y) / N
  P_false = 1 - P_true
  res = list()
  res[['prior']] = c(P_true,P_false)

  # For each feature, tabulate (estimate) conditional probabilities P(x_i|C): 
  #
  inds_True  =  y; n_True  = sum(inds_True)
  inds_False = !y; n_False = sum(inds_False) 
  for( fi in 1:n_features ){
    
    # P(x_i|Class=True):
    #
    feature_obs_when_True = X[inds_True,fi]
    numer = table( feature_obs_when_True ) + 1
    denom = n_True + nlevels(feature_obs_when_True)
    P_xi_given_C_T = numer / denom
    nm = paste( 'P_', colnames(X)[fi], '_given_C_T', sep='' )
    res[[nm]] = P_xi_given_C_T

    # P(x_i|Class=False):
    #
    feature_obs_when_False = X[inds_False,fi]
    numer = table( feature_obs_when_False ) + 1
    denom = n_False + nlevels(feature_obs_when_False)
    P_xi_given_C_F = numer / denom 
    nm = paste( 'P_', colnames(X)[fi], '_given_C_F', sep='' )
    res[[nm]] = P_xi_given_C_F    
  }

  res 
  
}

naive_bayes_predict = function( model, X ){
  # 
  # Make predictions using a Naive Bayes model (that we built earlier)
  #

  n_samples  = dim(X)[1] # the number of samples
  n_features = dim(X)[2] # the number of features 
  
  # Compute the product \prod_i P(f_i = x_{ij}|C) over each feature f_i
  #
  for( fi in 1:n_features ){
    feat_name = colnames(X)[fi]

    nm = paste( 'P_', feat_name, '_given_C_T', sep='' )
    probs_True = model[[ nm ]]
    probs_True[ X[[feat_name]] ]

    nm = paste( 'P_', feat_name, '_given_C_F', sep='' )
    probs_False = model[[ nm ]]

    if( fi == 1 ){
      y_hat_T = as.vector( probs_True[ X[[feat_name]] ] )
      y_hat_F = as.vector( probs_False[ X[[feat_name]] ] )
    }else{
      y_hat_T = y_hat_T * as.vector( probs_True[ X[[feat_name]] ] )
      y_hat_F = y_hat_F * as.vector( probs_False[ X[[feat_name]] ] )
    } 
  } #endfor 

  # Multiply in using the prior:
  #
  y_hat_T = y_hat_T * model$prior[1]
  y_hat_F = y_hat_F * model$prior[2] 

  # Normalize to get probabilities:
  #
  norm_factor = colSums( rbind( y_hat_T, y_hat_F ) )
  prob_T = y_hat_T / norm_factor
  prob_F = 1 - prob_T 

  # Make predictions:
  #
  willwait = rep( TRUE, n_samples )
  willwait[ prob_F > prob_T ] = FALSE

  willwait 
}