# # Duplicates the analysis on the two-way table 8.4: # The Effects of Sunlight and Fertilizer on Crop Yield on Page 139 of the book. # DF = data.frame( sunlight=as.factor( c( rep( 'Lo', 9 ), rep( 'Hi', 9 ) ) ), fertilizer=as.factor( rep( c( 'Lo', 'Med', 'Hi' ), 6 ) ), yield=c( 5, 15, 21, 10, 22, 29, 8, 18, 25, 6, 25, 55, 9, 32, 60, 12, 40, 48 ) ) # The classical ANOVA test (from R in Action EPage 234) # fit = aov( yield ~ sunlight*fertilizer, data=DF ) print( sprintf("Classical ANOV ") ) print(summary(fit)) # # To study the main effects of sunlight we will apply the permuation test to the two blocks of sunlight: # LO_SUN_MASK = DF$sunligh == 'Lo' S = sum( DF$yield[ LO_SUN_MASK ] ) # gives 153 like the book (should be a small value if Hi sunlight increase yield) set.seed(1234) B = 5000 # the sumber of bootstraps to run all_Ss = rep( NA, B ) # the bootstrap values of the S df_boot = DF for( bi in 1:B ){ df_boot$yield = sample(df_boot$yield) # permute across sunlight level and fertilizer level all_Ss[bi] = sum( df_boot$yield[ LO_SUN_MASK ] ) } plot( density( all_Ss ), xlab="statistic", main="" ) grid() abline(v=S, col='red') alpha = sum( all_Ss <= S ) / B print( sprintf("Permutation (B= %d) Sunlight main effects: prob. type I error: %10.6f", B, alpha) ) # # To study the main effects of fertilizer we will apply the permuation test to the levels of fertilizer: # compute_F = function( group_names, data ){ ugroups = unique( group_names ) ng = length(ugroups) x_bar_dot_dot = mean( data ) # the grand mean x_bar_i_dot = rep( NA, ng ) for( gi in 1:ng ){ d = data[ group_names == ugroups[gi] ] x_bar_i_dot[gi] = mean(d) } F = sum( abs( x_bar_i_dot - x_bar_dot_dot ) ) return(F) } F = compute_F( DF$fertilizer, DF$yield ) # the value of F under the original ordering set.seed(1234) B = 5000 # the sumber of bootstraps to run all_Fs = rep( NA, B ) # the bootstrap values of the F_1 for( bi in 1:B ){ all_Fs[bi] = compute_F( sample(DF$fertilizer), DF$yield ) # reorder the labels of the fertilizer } plot( density( all_Fs ), xlab="statistic", main="" ) grid() abline(v=F, col='red') alpha = sum( all_Fs >= F ) / B print( sprintf("Permutation (B= %d) Fertilizer main effects: prob. type I error: %10.6f", B, alpha) ) # # Test for an interaction term: # x_bar_dot_dot = mean( DF$yield ) # the grand mean group_I = unique( DF$sunlight ) I = length( group_I ) x_bar_i_dot_dot = rep( NA, I ) for( i in 1:I ){ inds = DF$sunlight == group_I[i] x_bar_i_dot_dot[i] = mean( DF$yield[ inds ] ) } group_J = unique( DF$fertilizer ) J = length( group_J ) x_bar_dot_j_dot = rep( NA, J ) for( j in 1:J ){ inds = DF$fertilizer == group_J[j] x_bar_dot_j_dot[j] = mean( DF$yield[ inds ] ) } # Compute the residuals X_ijk': # DF_residual = DF for( row in 1:dim(DF_residual)[1] ){ sun_i = which( DF_residual[row, 'sunlight'] == group_I ) fert_j = which( DF_residual[row, 'fertilizer'] == group_J ) DF_residual[row, 'yield'] = DF_residual[row, 'yield'] - x_bar_i_dot_dot[sun_i] - x_bar_dot_j_dot[fert_j] + x_bar_dot_dot } # Compute the test statistics "I" using the residual data above: # compute_I = function( DF, group_I, group_J ){ I_statistic = 0 for( i in 1:length(group_I) ){ for( j in 1:length(group_J) ){ inds = (DF$sunlight == group_I[i]) & (DF$fertilize == group_J[j] ) s = sum( DF$yield[inds] ) #print( sprintf( 'i= %d; j= %d; s= %f; s^2= %f', i, j, s, s^2 ) ) I_statistic = I_statistic + s^2 } } return(I_statistic) } I0 = compute_I( DF_residual, group_I, group_J ) # the original value of the statistic "I" set.seed(1234) B = 5000 # the sumber of bootstraps to run DF_boot = DF_residual all_Is = rep( NA, B ) # the bootstrap values of the F_1 for( bi in 1:B ){ DF_boot$yield = sample(DF_boot$yield) all_Is[bi] = compute_I( DF_boot, group_I, group_J ) } plot( density( all_Is ), xlab="statistic", main="" ) grid() abline(v=I0, col='red') alpha = sum( all_Is >= I0 ) / B print( sprintf("Permutation ( = %d) Interation effects: prob. type I error: %10.6f", B, alpha) )