#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----

Mode <- function(x, na.rm = FALSE) {
    if(na.rm){
        x=x[!is.na(x)]
    }
    ux <- unique(x)
    return(ux[which.max(tabulate(match(x, ux)))])
}

DF = read.csv("../../Data/chap_1_prob_12.csv")

# Generate data that will have the properties of the given binning:
#
n_samples = sum(DF$Number.of.employees) # the number of samples we need to draw
wage_data = c()

wage_range = strsplit( as.character( DF$Wage ), '-' )
n = dim(DF)[1]

set.seed(1234)

for( ii in 1:n ){
    left_edge = as.integer( wage_range[[ii]][1] )
    right_edge = as.integer( wage_range[[ii]][2] )
    ns = DF$Number.of.employees[ii]
    d = runif( ns, min=left_edge, max=right_edge )
    wage_data = c( wage_data, d )
}

# Plot a histrogram and a cumulative frequency distribution for this data:
#
#postscript("../../WriteUp/Graphics/Chapter1/ex_12_density_plots.eps", onefile=FALSE, horizontal=FALSE)
par(mfrow=c(1,2))
hist( wage_data )
plot( ecdf( wage_data ), main="Salary" )
par(mfrow=c(1,1))
#dev.off()

# Compute statistics:
#
stats = c( mean( wage_data ), median( wage_data ), Mode( wage_data ) )
print( stats )