#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----

Mode <- function(x, na.rm = FALSE) {
    if(na.rm){
        x=x[!is.na(x)]
    }
    ux <- unique(x)
    return(ux[which.max(tabulate(match(x, ux)))])
}

L1Norm = function(x){
    m = mean(x)
    L1 = sum( abs(x - m) )/(length(x)-1)
    return(L1)
}

set.seed(1234)

DF = read.csv("../../Data/sixth_grade_heights.csv")
n = length(DF$Height)

# Perform bootstrap replicas:
#
B = 100
all_means = rep( NA, B )
all_medians = rep( NA, B )
all_modes = rep( NA, B )
for( bi in 1:B ){
    b = sample( DF$Height, size=n, replace=TRUE )
    all_means[bi] = mean(b)
    all_medians[bi] = median(b)
    all_modes[bi] = Mode(b)
}

# We now want to estimate the accuracy of each of the measures of central tendency:
#
precision_sd = c( sd( all_means ), sd( all_medians ), sd( all_modes ) ) # estimates of precision based on the standard deviation
precision_L1 = c( L1Norm( all_means ), L1Norm( all_medians ), L1Norm( all_modes ) ) # estimates of precision based on the L_l norm

precision_DF = data.frame( "std"=precision_sd, "L1"=precision_L1 )
rownames(precision_DF) = c("mean", "median", "mode" )
print(precision_DF)

# Display the standard deviation and the L1 norm of the original sample:
#
print( "SD and L1Norm of the original sample" )
print( c( sd(DF$Height), L1Norm(DF$Height) ) )