#
# Duplicates the analysis on the two-way table 8.4:
# The Effects of Sunlight and Fertilizer on Crop Yield on Page 139 of the book.
#
DF = data.frame(
    sunlight=as.factor( c( rep( 'Lo', 9 ), rep( 'Hi', 9 ) ) ),
    fertilizer=as.factor( rep( c( 'Lo', 'Med', 'Hi' ), 6 ) ),
    yield=c( 5, 15, 21, 10, 22, 29, 8, 18, 25, 6, 25, 55, 9, 32, 60, 12, 40, 48 )
)

# The classical ANOVA test (from R in Action EPage 234)
#
fit = aov( yield ~ sunlight*fertilizer, data=DF )
print( sprintf("Classical ANOV ") )
print(summary(fit))

#
# To study the main effects of sunlight we will apply the permuation test to the two blocks of sunlight:
#
LO_SUN_MASK = DF$sunligh == 'Lo'
S = sum( DF$yield[ LO_SUN_MASK ] ) # gives 153 like the book (should be a small value if Hi sunlight increase yield)

set.seed(1234)
B = 5000 # the sumber of bootstraps to run
all_Ss = rep( NA, B ) # the bootstrap values of the S
df_boot = DF
for( bi in 1:B ){
    df_boot$yield = sample(df_boot$yield) # permute across sunlight level and fertilizer level
    all_Ss[bi] = sum( df_boot$yield[ LO_SUN_MASK ] )
}

plot( density( all_Ss ), xlab="statistic", main="" )
grid()
abline(v=S, col='red')

alpha = sum( all_Ss <= S ) / B
print( sprintf("Permutation (B= %d) Sunlight main effects: prob. type I error: %10.6f", B, alpha) )

#
# To study the main effects of fertilizer we will apply the permuation test to the levels of fertilizer:
#
compute_F = function( group_names, data ){
    ugroups = unique( group_names )
    ng = length(ugroups)
    x_bar_dot_dot = mean( data ) # the grand mean
    x_bar_i_dot = rep( NA, ng )
    for( gi in 1:ng ){
        d = data[ group_names == ugroups[gi] ]
        x_bar_i_dot[gi] = mean(d)
    }
    F = sum( abs( x_bar_i_dot - x_bar_dot_dot ) )
    return(F)
}

F = compute_F( DF$fertilizer, DF$yield ) # the value of F under the original ordering

set.seed(1234)
B = 5000 # the sumber of bootstraps to run
all_Fs = rep( NA, B ) # the bootstrap values of the F_1
for( bi in 1:B ){
    all_Fs[bi] = compute_F( sample(DF$fertilizer), DF$yield ) # reorder the labels of the fertilizer
}

plot( density( all_Fs ), xlab="statistic", main="" )
grid()
abline(v=F, col='red')

alpha = sum( all_Fs >= F ) / B
print( sprintf("Permutation (B= %d) Fertilizer main effects: prob. type I error: %10.6f", B, alpha) )

#
# Test for an interaction term:
#
x_bar_dot_dot = mean( DF$yield ) # the grand mean

group_I = unique( DF$sunlight )
I = length( group_I )
x_bar_i_dot_dot = rep( NA, I )
for( i in 1:I ){ 
    inds = DF$sunlight == group_I[i]
    x_bar_i_dot_dot[i] = mean( DF$yield[ inds ] )
}

group_J = unique( DF$fertilizer )
J = length( group_J )
x_bar_dot_j_dot = rep( NA, J )
for( j in 1:J ){ 
    inds = DF$fertilizer == group_J[j]
    x_bar_dot_j_dot[j] = mean( DF$yield[ inds ] )
}

# Compute the residuals X_ijk':
#
DF_residual = DF
for( row in 1:dim(DF_residual)[1] ){
    sun_i = which( DF_residual[row, 'sunlight'] == group_I )
    fert_j = which( DF_residual[row, 'fertilizer'] == group_J )
    DF_residual[row, 'yield'] = DF_residual[row, 'yield'] - x_bar_i_dot_dot[sun_i] - x_bar_dot_j_dot[fert_j] + x_bar_dot_dot
}

# Compute the test statistics "I" using the residual data above:
#
compute_I = function( DF, group_I, group_J ){
    I_statistic = 0
    for( i in 1:length(group_I) ){
        for( j in 1:length(group_J) ){
            inds = (DF$sunlight == group_I[i]) & (DF$fertilize == group_J[j] )
            s = sum( DF$yield[inds] )
            #print( sprintf( 'i= %d; j= %d; s= %f; s^2= %f', i, j, s, s^2 ) )
            I_statistic = I_statistic + s^2
        }
    }
    return(I_statistic)
}

I0 = compute_I( DF_residual, group_I, group_J ) # the original value of the statistic "I"

set.seed(1234)
B = 5000 # the sumber of bootstraps to run
DF_boot = DF_residual
all_Is = rep( NA, B ) # the bootstrap values of the F_1
for( bi in 1:B ){ 
    DF_boot$yield = sample(DF_boot$yield)
    all_Is[bi] = compute_I( DF_boot, group_I, group_J )
}

plot( density( all_Is ), xlab="statistic", main="" )
grid()
abline(v=I0, col='red')

alpha = sum( all_Is >= I0 ) / B
print( sprintf("Permutation ( = %d) Interation effects: prob. type I error: %10.6f", B, alpha) )