# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # Utility function for Chapter 10 # #----- ANOVA = function(data, group){ # # ANOVA assuming equal sample sizes. # ugroup = unique( group ) I = length( ugroup ) # all groups must have the same sumber of samples (as the first one): J = sum( group==ugroup[1] ) x_i_dot = rep( NA, I ) xbar_i_dot = rep( NA, I ) # the mean for the samples in the group for( gi in 1:I ){ group_data = data[ group==ugroup[gi] ] stopifnot( length(group_data)==J ) x_i_dot[gi] = sum( group_data ) xbar_i_dot[gi] = x_i_dot[gi] / J } x_dot_dot = sum(x_i_dot) SST = sum( data^2 ) - x_dot_dot^2 / ( I * J ) SSTr = sum( x_i_dot^2 ) / J - x_dot_dot^2 / ( I * J ) SSE = SST - SSTr MSTr = SSTr / (I-1) MSE = SSE / ( I*(J-1) ) f = MSTr / MSE p_value = 1 - pf( f, I-1, I*(J-1) ) # return everything as a list: list( I=I, J=J, x_i_dot=x_i_dot, xbar_i_dot=xbar_i_dot, MSTr=MSTr, MSE=MSE, f=f, p_value=p_value ) } ANOVA_USS = function(data, group){ # # ANOVA assuming unequal sample sizes. # ugroup = unique( group ) I = length( ugroup ) Js = rep( NA, I ) # groups could have different sample sizes x_i_dot = rep( NA, I ) # the sum of the samples in the group xbar_i_dot = rep( NA, I ) # the mean for the samples in the group ss = rep( NA, I ) # the standard deviation of the samples in group for( gi in 1:I ){ group_data = data[ group==ugroup[gi] ] Js[gi] = length(group_data) x_i_dot[gi] = sum( group_data ) xbar_i_dot[gi] = x_i_dot[gi] / Js[gi] # compute the group mean ss[gi] = sd(group_data) } x_dot_dot = sum(x_i_dot) n = length(data) SST = sum( data^2 ) - x_dot_dot^2 / n SSTr = sum( x_i_dot^2 / Js ) - x_dot_dot^2 / n SSE = SST - SSTr MSTr = SSTr / (I-1) MSE = SSE / ( n-I ) f = MSTr / MSE p_value = 1 - pf( f, I-1, n-I ) # return everything as a list: list( I=I, Js=Js, n=n, x_i_dot=x_i_dot, xbar_i_dot=xbar_i_dot, ss=ss, MSTr=MSTr, MSE=MSE, f=f, p_value=p_value ) } mean_diff_CI_utils = function(x_i_dot, Js, MSE, q_alpha=0.05){ # # Compute some expressions that help determine if two means are significantly different. # # Inputs: # x_i_dot = the group sums # Js = the number of samples summed in each group # #----- xbar_i_dot = x_i_dot / Js # compute the group means xbar_i_dot = sort( xbar_i_dot ) # sorted in increasing order # This is a matrix of the differences between each mean: # mean_differences = outer( xbar_i_dot, xbar_i_dot, "-" ) I = length(Js) n = sum(Js) q_crit = qtukey( 1-q_alpha, I, n-I ) vs = (MSE/2)* outer( 1/Js, 1/Js, "+" ) # This is a matrix with entry w_ij such that if the distance between the means mu_i - mu_j is not # significantly larger than this value the observed difference is insignificant. # ws = q_crit * sqrt( vs ) # take only 1/2 of this matrix (the top or the bottom) list( sorted_means=xbar_i_dot, mean_differences=mean_differences, ws=ws ) }