#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
# rm(list = setdiff(ls(), lsf.str())) # removes all variables (not functions)
#
#-----

source('utils.R')

# Ex 11.16
# 
I = 3
J = 4
K = 3
SSA = 30763.0
SSB = 34185.6
SSE = 97436.8
SST = 205966.6
SSAB = SST - SSA - SSB - SSE

MST = SST / (I*J*K-1)
MSA = SSA / (I-1)
MSB = SSB / (J-1)
MSE = SSE / ( I*J*(K-1) )
MSAB = SSAB / ( (I-1)*(J-1) )

f_A = MSA/MSE
f_A_crit = qf( 1 - 0.05, I-1, I*J*(K-1) )
1 - pf( f_A, I-1, I*J*(K-1) )

f_B = MSB / MSE
f_B_crit = qf( 1 - 0.05, J-1, I*J*(K-1) )
1 - pf( f_B, J-1, I*J*(K-1) )

f_AB = MSAB / MSE
f_AB_crit = qf( 1 - 0.05, (I-1)*(J-1), I*J*(K-1) )
1 - pf( f_AB, (I-1)*(J-1), I*J*(K-1) )

print( c( f_A, f_B, f_AB ) )

x_i_dot_dot = c( 4010.88, 4029.10, 3960.02 )

q_crit = qtukey( 1-0.05, I, I*J*(K-1) )
ss = sqrt( MSE/(J*K) )
ws = q_crit * ss

print( diff(x_i_dot_dot) )


# Ex 11.17
#
DF = read.csv( "../../Data/CH11/ex11-17.txt", header=TRUE, quote="'" )
colnames(DF) = c("Sand.Addition", "Carbon.Fiber.Addition", "Casting.Hardness", "Wet.Mold.Strength")
DF$Sand.Addition = as.factor( DF$Sand.Addition )
DF$Carbon.Fiber.Addition = as.factor( DF$Carbon.Fiber.Addition )

a = aov( Wet.Mold.Strength ~ Sand.Addition*Carbon.Fiber.Addition, data=DF )
summary(a)

a = aov( Casting.Hardness ~ Sand.Addition*Carbon.Fiber.Addition, data=DF )
summary(a)

# Plot the mean hardness for different levels of carbon fiber (there are several ways to do this):
#
interaction.plot( DF$Sand.Addition, DF$Carbon.Fiber.Addition, DF$Casting.Hardness, 
                  type="b", col=c("red", "darkgreen"),
                  pch=c(16, 18),
                  main="Interaction between Sand.Addition and Carbon.Fiber.Addition")

if( FALSE ){
    # I could not get this function to work in the time I had to work on this exercise.
    # If anyone can get this plot working please contact me.
    #
    library(gplots)
    plotmeans( Casting.Hardness ~ Sand.Addition * Carbon.Fiber.Addition,
              data=DF,
              col=c("red", "darkgreen"),
              main="test",
              xlab="test2" )
}


# Ex 11.18
#
DF = read.csv( "../../Data/CH11/ex11-18.txt", header=TRUE, quote="'" )
DF$Speed = as.factor( DF$Speed )
DF$Formulation = as.factor( DF$Formulation )

a = aov(Yield ~ Speed * Formulation, data=DF)
summary(a)

# Get estimates of alpha_hat_i and beta_hat_j (using the sum of squares data):
#
res = TWO_FACTOR_KIJ_ANOVA( DF$Yield, DF$Speed, DF$Formulation )
print( res )

# You can check the above code using the books values of the above numbers:
#
SSSpeed = 230.81 # =SSA
SSForm = 2253.44 # =SSB
SSFormSpeed = 18.58 # =SSAB
SSE = 71.87 # =SSE
SST = SSForm + SSSpeed + SSFormSpeed + SSE # =SST

residuals = c( .23, -.87, .63, 4.50, -1.20, -3.30, -2.03, 1.97, .07, -1.10, -.30, 1.40, .67, -1.23, .57, -3.43, -.13, 3.57 )

if( FALSE ){
    # Not working:
    print( coef( lm( Yield ~ Speed * Formulation, data=DF, contrasts="contr.sum" ) ) )
}

res = residuals(a)
qqnorm( res )
qqline( res )


# Ex 11.19
#
DF = read.csv( "../../Data/CH11/ex11-19.txt", header=TRUE, quote="'" )

a = aov( Response ~ NaOH.Conc. * Type.of.Coal, data=DF )
summary(a)

res = TWO_FACTOR_KIJ_ANOVA( DF$Response, DF$NaOH.Conc., DF$Type.of.Coal )
print( res ) 
    
q_crit = qtukey( 1-0.01, res$J, res$I*res$J*(res$K-1) )
ss = sqrt( res$MSE/(res$I*res$K) )
ws = q_crit * ss

print( diff( sort( res$xbar_dot_j_dot) ) )


# Ex 11.20: TODO
#
# This does not seem to be the data referred to by the problem I'm looking at:
# 
# DF = read.csv( "../../Data/CH11/ex11-20.txt", header=TRUE, quote="'" ) 

brightness=c( 280, 290, 285, 300, 310, 295, 270, 285, 290, 230, 235, 240, 260, 240, 235, 220, 225, 230 )
phosphor.type = rep( c(1,2,3), times=2, each=3 )
glass.type = rep( c(1,2), times=1, each=9 )
DF = data.frame( brightness=brightness, phosphor.type=phosphor.type, glass.type=glass.type )

res = TWO_FACTOR_KIJ_ANOVA( DF$brightness, DF$glass.type, DF$phosphor.type )


# Ex 11.21
#
I = 3
J = 5
K = 2

SSA = 22941.80
SSB = 22765.53
SSE = 15253.50
SST = 64954.70
SSAB = SST - SSA - SSB - SSE

MST = SST / (I*J*K-1)
MSA = SSA / (I-1)
MSB = SSB / (J-1)
MSAB = SSAB / ( (I-1)*(J-1) )
MSE = SSE / ( I*J*(K-1) )

f_AB = MSAB / MSE
f_A = MSA / MSAB # these change from the case where both effects are fixed
f_B = MSB / MSAB

p_value_A = 1 - pf( f_A, I-1, (I-1)*(J-1) ) # the d.o.f in the denominator is different from the case where both effects are fixed
p_value_B = 1 - pf( f_B, J-1, (I-1)*(J-1) )
p_value_AB = 1 - pf( f_AB, (I-1)*(J-1), I*J*(K-1) )

print( c(f_A, f_B, f_AB) )
print( c(p_value_A, p_value_B, p_value_AB) )
 

# Ex 11.22
#
DF = read.csv( "../../Data/CH11/ex11-22.txt", header=TRUE, quote="'" )
DF$Pen = as.factor( DF$Pen )
DF$Surface = as.factor( DF$Surface )

# Not all of these statistics (i.e. p-values) are correct under the mixed effects model but
# many are helpful to have extracted for us:
#
res = TWO_FACTOR_KIJ_ANOVA( DF$Response, DF$Pen, DF$Surface )

# Correct the values of f_A, f_B, p_value_A, and p_value_B:
#
f_AB = res$MSAB / res$MSE
f_A = res$MSA / res$MSAB # these two denominators are different due to the mixed effect
f_B = res$MSB / res$MSAB

I = res$I
J = res$J
K = res$K
p_value_A = 1 - pf( f_A, I-1, (I-1)*(J-1) ) # the d.o.f in the denominator is different from the case where both effects are fixed
p_value_B = 1 - pf( f_B, J-1, (I-1)*(J-1) )
p_value_AB = 1 - pf( f_AB, (I-1)*(J-1), I*J*(K-1) )

print( c(f_A, f_B, f_AB) )
print( c(p_value_A, p_value_B, p_value_AB) )


# Ex 11.23
#
DF = read.csv( "../../Data/CH11/ex11-23.txt", header=TRUE, quote="'" )

I = 3
J = 5
K = 3

sum_xijk2 = 16815853
sum_xijdot2 = 50443409

DF$MeanResponse = DF$Response/K # the mean response in each cell

xbar_dot_dot_dot = sum( DF$Response ) / ( I * J * K )

SSE = sum_xijk2 - K * sum( DF$MeanResponse^2 )
xbar_dot_dot_dot = sum( DF$Response ) / ( I * J * K )
SST = sum_xijk2 - I * J * K * xbar_dot_dot_dot^2

ugroup_A = unique( DF$Material )
ugroup_B = unique( DF$Batch )

xbar_i_dot_dot = rep( NA, I )
for( gi in 1:I ){
    group_A_data = DF$Response[ DF$Material==ugroup_A[gi] ]
    xbar_i_dot_dot[gi] = sum( group_A_data ) / ( J * K )
}

xbar_dot_j_dot = rep( NA, J )
for( gj in 1:J ){
    group_B_data = DF$Response[ DF$Batch==ugroup_B[gj] ]
    xbar_dot_j_dot[gj] = sum( group_B_data ) / ( I * K )
}

SSA = J * K * sum( ( xbar_i_dot_dot - xbar_dot_dot_dot )^2 )

SSB = I * K * sum( ( xbar_dot_j_dot - xbar_dot_dot_dot )^2 )

SSAB = SST - SSA - SSB - SSE

# Compute the mean square errors:
#
MSAB = SSAB / ( (I-1)*(J-1) )
MSA = SSA / (I-1)
MSB = SSB / (J-1)
MSE = SSE / ( I*J*(K-1) )

f_AB = MSAB / MSE
f_A = MSA / MSAB # these change from the case where both effects are
f_B = MSB / MSAB

p_value_A = 1 - pf( f_A, I-1, (I-1)*(J-1) ) # the d.o.f in the denominator is different from the case where both effects are fixed
p_value_B = 1 - pf( f_B, J-1, (I-1)*(J-1) )
p_value_AB = 1 - pf( f_AB, (I-1)*(J-1), I*J*(K-1) )

print( c(f_A, f_B, f_AB) )
print( c(p_value_A, p_value_B, p_value_AB) )


# Ex 11.25:
#
DF = read.csv( "../../Data/CH11/ex11-19.txt", header=TRUE, quote="'" ) 
res = TWO_FACTOR_KIJ_ANOVA( DF$Response, DF$NaOH.Conc., DF $Type.of.Coal )

theta_hat = res$xbar_i_dot_dot[2] - res$xbar_i_dot_dot[3] # the difference alpha_i - alpha_{i'}
var_theta_hat = 2 * res$MSE / ( res$J * res$K )
df = res$I * res$J * ( res$K - 1 )

alpha = 0.05
t_crit = qt( 1 - alpha/2, df )
ci = theta_hat + sqrt( var_theta_hat ) * t_crit * c(-1,+1)
print( ci )