#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----

DF = read.csv("../../Data/chap_1_prob_11.csv")

groups = unique( DF$Year )

sum_of_values_in_group = c()
for( g in groups ){
    s = sum( DF[ DF$Year == g, ]$MPG )
    sum_of_values_in_group = c( sum_of_values_in_group, s )
}

g_i_fn = c( 1, 2, 3 )

# The statistics for the original labelling
S = sum( g_i_fn * sum_of_values_in_group )

year_N_count = table( DF$Year )
print( year_N_count )

one_permutation_sample_inds = function( n, counts ){
  #
  # Get the indices of a bootstrap sample of the same size as the ones given
  #
  remaining_labels = 1:n

  inds_1 = sample( remaining_labels, counts[1] )
  
  remaining_labels = setdiff( remaining_labels, inds_1 )

  inds_2 = sample( remaining_labels, counts[2] )

  inds_3 = setdiff( remaining_labels, inds_2 )

  return( list(inds_1=inds_1, inds_2=inds_2, inds_3=inds_3) )
}

# Take the bootstrap samples (derive different labels of the data):
#
set.seed(1234)

data = DF$MPG
n_points = length(data)

B = 10000
null_stats = rep( NA, B )
for( bi in 1:B ){
  inds = one_permutation_sample_inds( n_points, year_N_count )
  group_sums = c( sum( data[ inds$inds_1 ] ), sum( data[ inds$inds_2 ]), sum( data[ inds$inds_3 ] ) )
  null_stats[bi] = sum( group_sums * g_i_fn )
}
#postscript("../../WriteUp/Graphics/Chapter3/ex_3_null_stat_histogram.eps", onefile=FALSE, horizontal=FALSE)
plot( density( null_stats ), xlab="statistic", main="" )
grid()
abline(v=S, col='red')
#dev.off()

alpha = sum( null_stats <= S ) / B
print( sprintf("prob. type I error: %10.6f", alpha) )