# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # Page 743 # #----- majority_function_gen_data = function( N, d=11 ){ # # A function to generate data from the majority function with 11 inputs # This function outputs a 1 if more than half its features are true # # To make this routine work well with the decision tree code the # values of the "d" features are the strings 'T' and 'F' # # But for other versions of the code we might need different representations # i.e. Boolean or floating point and thus we create each one here. # raw_data = sample( c(0,1), size=N*d, replace=TRUE, prob=c(0.5,0.5) ) X = matrix( raw_data, nrow=N, ncol=d ) X = data.frame( X ) rs = rowSums(X) # count the number of +1 vs. 0 (>d/2 means more +1s; d/2 y = as.double( y_bool ) X_string = X X_string[ X== 0 ] = 'F' X_string[ X=='1' ] = 'T' list( X=X, X_string=X_string, y=y, y_bool=y_bool ) }