#
# Here we attempt to duplicate the Wald-Wolfowitz Hotelling's T2 from Page 158 in the book.
#
lines = 'ID BC Albumin Uric_Acid
2381 N 43 54
1610 N 41 33
1149 N 39 50
2271 N 42 48
1946 Y 35 72
1797 Y 38 30
575 Y 40 46
39 Y 35 63'
con = textConnection( lines )
DF = read.table( con, header=TRUE )
close(con)

DF$BC = as.factor( DF$BC )

# Get the pooled feature means U:
#
U = colMeans( DF[,3:4] )

# Compute the matrix c:
#
AlbuminD = DF$Albumin - U[1] # this is X_i - U_i
UricD = DF$Uric_Acid - U[2]

XmU = data.frame( BC=DF$BC, AlbuminD=AlbuminD, UricD=UricD ) # X m(inus) U

c = matrix( 0, nrow=2, ncol=2 )
for( ii in 1:2 ){
    for( jj in 1:2 ){
        c[ii,jj] = mean( XmU[,ii+1] * XmU[,jj+1] )
    }
}

# Get the inverse of the matrix C (the same for every resampling):
#
c_inv = solve(c)

# Lets now compute bootstrapped replications of The {T'}^2 statistic:
#
compute_Tprime = function( DF, c_inv ){
    # Get the means in each class under the given ordering:
    #
    mask = DF$BC == 'N'
    u_N = colMeans( DF[mask,3:4] )
    mask = DF$BC == 'Y'
    u_Y = colMeans( DF[mask,3:4] )
    TP2 = t( u_N - u_Y ) %*% c_inv %*% (u_N - u_Y)
    return(as.double(TP2))
}
T0 = compute_Tprime( DF, c_inv )

# Do bootstraps:
#
set.seed(1234)
B = 200 # the number of bootstraps to run
all_T2s = rep( NA, B ) # the bootstrap values of the T^2
for( bi in 1:B ){
    DF$BC = sample(DF$BC) # reorder the labels
    all_T2s[bi] = compute_Tprime( DF, c_inv )
}

plot( density( all_T2s ), xlab="statistic", main="" )
grid()
abline(v=T0, col='red')

alpha = sum( all_T2s >= T0 ) / B
print( sprintf("Permutation Bootstrap (B= %d): prob. type I error: %10.6f", B, alpha) )