data = c( 3, 4, 6, 2, 1, 1, 5, 7, 4, 3, 8, 7, 6, 9, 5 ) location = as.factor( c( rep( 'city', 10 ), rep( 'burbs', 5 ) ) ) DF = data.frame( data=data, location=location ) # The classical ANOVA test (from R in Action EPage 234) # fit = aov( data ~ location, data=DF ) sfit = summary(fit) print( sprintf("Classical ANOVA: prob. type I error: %10.6f", sfit[[1]][["Pr(>F)"]][1]) ) # # To study the main effects of location we will apply a permuation test: # source('utils.R') F = compute_F( DF$location, DF$data ) set.seed(1234) B = 100 # the sumber of bootstraps to run all_Fs = rep( NA, B ) # the bootstrap values of the F for( bi in 1:B ){ all_Fs[bi] = compute_F( DF$location, sample(DF$data) ) } plot( density( all_Fs ), xlab="statistic", main="" ) grid() abline(v=F, col='red') alpha = sum( all_Fs >= F ) / B print( sprintf("Main effects for location (B= %d): prob. type I error: %10.6f", B, alpha) ) n = dim(DF)[1] m = (2/3) * mean( DF$data[1:10] ) + (1/3) * mean( DF$data[11:15] ) B = 100 all_Ms = rep( NA, B ) # hold the bootstrap values of the means for( bi in 1:B ){ data = sample(DF$data, n, replace=TRUE) all_Ms[bi] = (2/3) * mean( data[1:10] ) + (1/3) * mean( data[11:15] ) } ci = m + 2*sd(all_Ms)*c(-1,+1) print( sprintf("Point estimate: %f; 2 sigma CI= (%f, %f)", m, ci[1], ci[2]) )