#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----

treated = c( 121, 118, 110, 90 )
untreated = c( 95, 34, 22, 12 )

# We lump both data sets together and then randomly select four items to be the "treated"
# (the others are the untreated):
#
data = c( treated, untreated )

# Test the different statistics suggested:
#
X_1 = sum( treated ) - sum( untreated )
X_2 = mean( treated ) - mean( untreated )
X_3 = sum( treated^2 )
X_4 = sum( rank( data )[1:4] ) # the sum of the ranks of the first

stat_1 = function( treated_inds ){
    sum( data[treated_inds] ) - sum( data[-treated_inds] )
}

stat_2 = function( treated_inds ){
    mean( data[treated_inds] ) - mean( data[-treated_inds] )
}

stat_3 = function( treated_inds ){
    sum( data[treated_inds]^2 )
}

stat_4 = function( treated_inds ){
    sum( rank(data)[treated_inds] )
}

Stat_1_Values = combn( length(data), length(treated), stat_1 )
Stat_2_Values = combn( length(data), length(treated), stat_2 )
Stat_3_Values = combn( length(data), length(treated), stat_3 )
Stat_4_Values = combn( length(data), length(treated), stat_4 )

# Lets plot the histograms for each of these possible statistics:
#

#postscript("../../WriteUp/Graphics/Chapter3/ex_2_permutation_tests.eps", onefile=FALSE, horizontal=FALSE)
par(mfrow=c(1, 4))
hist( Stat_1_Values, breaks=20, xlab="difference of sample sums", main="" )
abline( v=X_1, col='red' )

hist( Stat_2_Values, breaks=20, xlab="difference of sample means", main="" )
abline( v=X_2, col='red' )

hist( Stat_3_Values, breaks=20, xlab="sum of squares of the treated values", main="" )
abline( v=X_3, col='red' )

hist( Stat_4_Values, breaks=20, xlab="sum of ranks of the treated values", main="" )
abline( v=X_4, col='red' )

par(mfrow=c(1,1))
#dev.off()

# What is the probability of getting the value obtained (or a larger one) for each of the statistics:
#
alpha = sum( Stat_1_Values >= X_1 ) / length( Stat_1_Values )
print( sprintf("S1: (prob. type I error): %10.6f", alpha) )

alpha = sum( Stat_2_Values >= X_2 ) / length( Stat_2_Values )
print( sprintf("S2: (prob. type I error): %10.6f", alpha) )

alpha = sum( Stat_3_Values >= X_3 ) / length( Stat_3_Values )
print( sprintf("S3: (prob. type I error): %10.6f", alpha) )

alpha = sum( Stat_4_Values >= X_4 ) / length( Stat_4_Values )
print( sprintf("S4: (prob. type I error): %10.6f", alpha) )