wilcoxon_rank_sum_test = function( x_data, y_data, Delta_0=0, debugging=FALSE ){ # # Returns: # w = sum of ranks of the m x's values # mu_w = mean used in normal approximation # sigma_w = standard deviation used in normal approximation # # See Page 609 from the book Probability and Statistics: For Engineering and the Sciences by Jay L. Devore # # Note: this code does not compute or use the values of tau_i (the number samples that have tied values), # as described on page 629 of the book. # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- if( length(x_data) > length(y_data) ){ print('Exchanging x_data and y_data since initially x_data is longer than y_data') tmp = x_data; x_data = y_data; y_data = tmp; } m = length(x_data) n = length(y_data) x_data = x_data - Delta_0 if( debugging ){ print("x_data="); print( x_data ) print("y_data="); print( y_data ) } # Check for duplicate measurements: data = c(x_data,y_data) data_orders = order( data ) if( debugging ){ print("data_orders=" ); print( data_orders ) } data_sorted = data[data_orders] if( debugging ){ print("data_sorted= "); print( data_sorted ) } if( sum( diff(data_sorted)==0 )>0 ){ print( 'WARNING: Duplicate data values found' ) } data_rank = rank( data, ties.method="average" ) if( debugging ){ print( "data_rank= " ); print( data_rank ) } w = sum(data_rank[1:m]) # Compute the large sample normal approximation mean and standard deviation: mu_w = m*(m+n+1)/2 sigma_w = sqrt( m*n*(m+n+1)/12 ) list( w=w, mu_w=mu_w, sigma_w=sigma_w ) }