# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- Mode <- function(x, na.rm = FALSE) { if(na.rm){ x=x[!is.na(x)] } ux <- unique(x) return(ux[which.max(tabulate(match(x, ux)))]) } DF = read.csv("../../Data/chap_1_prob_12.csv") # Generate data that will have the properties of the given binning: # n_samples = sum(DF$Number.of.employees) # the number of samples we need to draw wage_data = c() wage_range = strsplit( as.character( DF$Wage ), '-' ) n = dim(DF)[1] set.seed(1234) for( ii in 1:n ){ left_edge = as.integer( wage_range[[ii]][1] ) right_edge = as.integer( wage_range[[ii]][2] ) ns = DF$Number.of.employees[ii] d = runif( ns, min=left_edge, max=right_edge ) wage_data = c( wage_data, d ) } # Plot a histrogram and a cumulative frequency distribution for this data: # #postscript("../../WriteUp/Graphics/Chapter1/ex_12_density_plots.eps", onefile=FALSE, horizontal=FALSE) par(mfrow=c(1,2)) hist( wage_data ) plot( ecdf( wage_data ), main="Salary" ) par(mfrow=c(1,1)) #dev.off() # Compute statistics: # stats = c( mean( wage_data ), median( wage_data ), Mode( wage_data ) ) print( stats )