# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- DF = read.csv("../../Data/chap_1_prob_11.csv") groups = unique( DF$Year ) sum_of_values_in_group = c() for( g in groups ){ s = sum( DF[ DF$Year == g, ]$MPG ) sum_of_values_in_group = c( sum_of_values_in_group, s ) } g_i_fn = c( 1, 2, 3 ) # The statistics for the original labelling S = sum( g_i_fn * sum_of_values_in_group ) year_N_count = table( DF$Year ) print( year_N_count ) one_permutation_sample_inds = function( n, counts ){ # # Get the indices of a bootstrap sample of the same size as the ones given # remaining_labels = 1:n inds_1 = sample( remaining_labels, counts[1] ) remaining_labels = setdiff( remaining_labels, inds_1 ) inds_2 = sample( remaining_labels, counts[2] ) inds_3 = setdiff( remaining_labels, inds_2 ) return( list(inds_1=inds_1, inds_2=inds_2, inds_3=inds_3) ) } # Take the bootstrap samples (derive different labels of the data): # set.seed(1234) data = DF$MPG n_points = length(data) B = 10000 null_stats = rep( NA, B ) for( bi in 1:B ){ inds = one_permutation_sample_inds( n_points, year_N_count ) group_sums = c( sum( data[ inds$inds_1 ] ), sum( data[ inds$inds_2 ]), sum( data[ inds$inds_3 ] ) ) null_stats[bi] = sum( group_sums * g_i_fn ) } #postscript("../../WriteUp/Graphics/Chapter3/ex_3_null_stat_histogram.eps", onefile=FALSE, horizontal=FALSE) plot( density( null_stats ), xlab="statistic", main="" ) grid() abline(v=S, col='red') #dev.off() alpha = sum( null_stats <= S ) / B print( sprintf("prob. type I error: %10.6f", alpha) )