#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
# EPage 704
#
# Note: this procedure is very slow computationally.  There are a great number of loops over samples in the
# training set.  If there is a better way to implement this please contact me.
# 
#-----

table_lookup_learning = function( X, y ){
  #
  # X: matrix with each row a case and each column a feature value (attribute value) for that case
  # y: a column vector with each row the TRUE/FALSE label for the corresponding row in X (can only be either TRUE/FALSE) 
  # 
  #-----

  num_True  = sum( y )
  num_False = length(y) - num_True
  if( num_True > num_False ){
    d=TRUE
  }else{
    d=FALSE
  }

  return( list(data=X, response=y, d=d) )
}


table_lookup_predict_single_sample = function( lookup_table, x ){
  #
  # Predicts TRUE/FALSE label for a single sample using table lookup.
  #
  x_str = apply( x, 2, as.character )

  # Find which samples in our training set exactly match the input sample x:
  #
  matching_rows = c()
  for( ii in 1:(dim(lookup_table$data)[1]) ){
    v_str = apply( lookup_table$data[ii,], 2, as.character )
    if( all( v_str==x_str ) ){
      matching_rows = c( matching_rows, ii ) 
    }
  }

  # Depending on how many samples match we match with return:
  # 
  if( length(matching_rows)==0 ){ # no samples matched return the default 
    return(lookup_table$d)
  }
  if( length(matching_rows)==1 ){ # one sample matched return its label
    return(lookup_table$response[matching_rows[1]])
  }else{ # more than one sample matched return the most frequent label
    num_True  = sum( lookup_table$response[matching_rows] )
    num_False = length(matching_rows) - num_True
    if( num_True > num_False ){
      return(TRUE)
    }else{
      return(FALSE)
    }
  }
}


table_lookup_predict_multiple_samples = function( table_lookup, X ){
  #
  # Predicts TRUE/FALSE label for a many samples using table lookup
  # Note: I was not sure how to code the prediction algorithm in a vectorized way.
  # If anyone knows how to do such a thing please contact me.
  #
  n_samples = dim(X)[1]
  y = rep( FALSE, n_samples ) # some initial values 
  for( si in 1:n_samples ){
    y[si] = table_lookup_predict_single_sample( table_lookup, X[si,] )
  }
  return(y)
}