# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- treated = c( 121, 118, 110, 90 ) untreated = c( 95, 34, 22, 12 ) # We lump both data sets together and then randomly select four items to be the "treated" # (the others are the untreated): # data = c( treated, untreated ) # Test the different statistics suggested: # X_1 = sum( treated ) - sum( untreated ) X_2 = mean( treated ) - mean( untreated ) X_3 = sum( treated^2 ) X_4 = sum( rank( data )[1:4] ) # the sum of the ranks of the first stat_1 = function( treated_inds ){ sum( data[treated_inds] ) - sum( data[-treated_inds] ) } stat_2 = function( treated_inds ){ mean( data[treated_inds] ) - mean( data[-treated_inds] ) } stat_3 = function( treated_inds ){ sum( data[treated_inds]^2 ) } stat_4 = function( treated_inds ){ sum( rank(data)[treated_inds] ) } Stat_1_Values = combn( length(data), length(treated), stat_1 ) Stat_2_Values = combn( length(data), length(treated), stat_2 ) Stat_3_Values = combn( length(data), length(treated), stat_3 ) Stat_4_Values = combn( length(data), length(treated), stat_4 ) # Lets plot the histograms for each of these possible statistics: # #postscript("../../WriteUp/Graphics/Chapter3/ex_2_permutation_tests.eps", onefile=FALSE, horizontal=FALSE) par(mfrow=c(1, 4)) hist( Stat_1_Values, breaks=20, xlab="difference of sample sums", main="" ) abline( v=X_1, col='red' ) hist( Stat_2_Values, breaks=20, xlab="difference of sample means", main="" ) abline( v=X_2, col='red' ) hist( Stat_3_Values, breaks=20, xlab="sum of squares of the treated values", main="" ) abline( v=X_3, col='red' ) hist( Stat_4_Values, breaks=20, xlab="sum of ranks of the treated values", main="" ) abline( v=X_4, col='red' ) par(mfrow=c(1,1)) #dev.off() # What is the probability of getting the value obtained (or a larger one) for each of the statistics: # alpha = sum( Stat_1_Values >= X_1 ) / length( Stat_1_Values ) print( sprintf("S1: (prob. type I error): %10.6f", alpha) ) alpha = sum( Stat_2_Values >= X_2 ) / length( Stat_2_Values ) print( sprintf("S2: (prob. type I error): %10.6f", alpha) ) alpha = sum( Stat_3_Values >= X_3 ) / length( Stat_3_Values ) print( sprintf("S3: (prob. type I error): %10.6f", alpha) ) alpha = sum( Stat_4_Values >= X_4 ) / length( Stat_4_Values ) print( sprintf("S4: (prob. type I error): %10.6f", alpha) )