# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 704 # # Note: this procedure is very slow computationally. There are a great number of loops over samples in the # training set. If there is a better way to implement this please contact me. # #----- table_lookup_learning = function( X, y ){ # # X: matrix with each row a case and each column a feature value (attribute value) for that case # y: a column vector with each row the TRUE/FALSE label for the corresponding row in X (can only be either TRUE/FALSE) # #----- num_True = sum( y ) num_False = length(y) - num_True if( num_True > num_False ){ d=TRUE }else{ d=FALSE } return( list(data=X, response=y, d=d) ) } table_lookup_predict_single_sample = function( lookup_table, x ){ # # Predicts TRUE/FALSE label for a single sample using table lookup. # x_str = apply( x, 2, as.character ) # Find which samples in our training set exactly match the input sample x: # matching_rows = c() for( ii in 1:(dim(lookup_table$data)[1]) ){ v_str = apply( lookup_table$data[ii,], 2, as.character ) if( all( v_str==x_str ) ){ matching_rows = c( matching_rows, ii ) } } # Depending on how many samples match we match with return: # if( length(matching_rows)==0 ){ # no samples matched return the default return(lookup_table$d) } if( length(matching_rows)==1 ){ # one sample matched return its label return(lookup_table$response[matching_rows[1]]) }else{ # more than one sample matched return the most frequent label num_True = sum( lookup_table$response[matching_rows] ) num_False = length(matching_rows) - num_True if( num_True > num_False ){ return(TRUE) }else{ return(FALSE) } } } table_lookup_predict_multiple_samples = function( table_lookup, X ){ # # Predicts TRUE/FALSE label for a many samples using table lookup # Note: I was not sure how to code the prediction algorithm in a vectorized way. # If anyone knows how to do such a thing please contact me. # n_samples = dim(X)[1] y = rep( FALSE, n_samples ) # some initial values for( si in 1:n_samples ){ y[si] = table_lookup_predict_single_sample( table_lookup, X[si,] ) } return(y) }