# # Here we attempt to duplicate the Wald-Wolfowitz Hotelling's T2 from Page 158 in the book. # lines = 'ID BC Albumin Uric_Acid 2381 N 43 54 1610 N 41 33 1149 N 39 50 2271 N 42 48 1946 Y 35 72 1797 Y 38 30 575 Y 40 46 39 Y 35 63' con = textConnection( lines ) DF = read.table( con, header=TRUE ) close(con) DF$BC = as.factor( DF$BC ) # Get the pooled feature means U: # U = colMeans( DF[,3:4] ) # Compute the matrix c: # AlbuminD = DF$Albumin - U[1] # this is X_i - U_i UricD = DF$Uric_Acid - U[2] XmU = data.frame( BC=DF$BC, AlbuminD=AlbuminD, UricD=UricD ) # X m(inus) U c = matrix( 0, nrow=2, ncol=2 ) for( ii in 1:2 ){ for( jj in 1:2 ){ c[ii,jj] = mean( XmU[,ii+1] * XmU[,jj+1] ) } } # Get the inverse of the matrix C (the same for every resampling): # c_inv = solve(c) # Lets now compute bootstrapped replications of The {T'}^2 statistic: # compute_Tprime = function( DF, c_inv ){ # Get the means in each class under the given ordering: # mask = DF$BC == 'N' u_N = colMeans( DF[mask,3:4] ) mask = DF$BC == 'Y' u_Y = colMeans( DF[mask,3:4] ) TP2 = t( u_N - u_Y ) %*% c_inv %*% (u_N - u_Y) return(as.double(TP2)) } T0 = compute_Tprime( DF, c_inv ) # Do bootstraps: # set.seed(1234) B = 200 # the number of bootstraps to run all_T2s = rep( NA, B ) # the bootstrap values of the T^2 for( bi in 1:B ){ DF$BC = sample(DF$BC) # reorder the labels all_T2s[bi] = compute_Tprime( DF, c_inv ) } plot( density( all_T2s ), xlab="statistic", main="" ) grid() abline(v=T0, col='red') alpha = sum( all_T2s >= T0 ) / B print( sprintf("Permutation Bootstrap (B= %d): prob. type I error: %10.6f", B, alpha) )