# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Ex. 1.33: # # DF = read.csv( "../../Data/CH01/ex01-33.txt", header=TRUE, quote="'" ) # this does not seem to be the correct data # data = c( 244, 191, 160, 187, 180, 176, 174, 205, 211, 183, 211, 180, 194, 200 ) print( sprintf("origina1 data: mean: %10.3f; median= %10.3f", mean(data), median(data)) ) # Change the first observation: # data[1] = 204 print( sprintf("changed first observation: mean: %10.3f; median: %10.3f", mean(data), median(data)) ) # Remove the smallest and largest value: # data = c( 244, 191, 160, 187, 180, 176, 174, 205, 211, 183, 211, 180, 194, 200 ) data = data[ datamin(data) ] print( sprintf("dropped min/max: mean: %10.3f; median: %10.3f", mean(data), median(data)) ) print( ( 13 * 119.8 + 159 ) / 14 ) # Ex. 1.34: # DF = read.csv( "../../Data/CH01/ex01-34.txt", header=TRUE, quote="'" ) mean_U = mean( DF$dust..EU.mg.[ DF$Urban.Farm == 'U' ] ) mean_F = mean( DF$dust..EU.mg.[ DF$Urban.Farm == 'F' ] ) print( sprintf("urban mean: %10.6f; farm mean: %10.6f", mean_U, mean_F) ) median_U = median( DF$dust..EU.mg.[ DF$Urban.Farm == 'U' ] ) median_F = median( DF$dust..EU.mg.[ DF$Urban.Farm == 'F' ] ) print( sprintf("urban median: %10.6f; farm median: %10.6f", median_U, median_F) ) mask_urban = DF$Urban.Farm == 'U' n_urban = sum( mask_urban ) urban = DF$dust..EU.mg.[ mask_urban ] tmean_U = mean( urban, trim=(1/n_urban) ) mask_farm = DF$Urban.Far == 'F' n_farm = sum( mask_farm ) farm = DF$dust..EU.mg.[ mask_farm ] tmean_F = mean( farm, trim=(1/n_farm) ) print( sprintf("urban trimmed mean: %10.6f; farm trimmed mean: %10.6f", tmean_U, tmean_F) ) # Ex. 1.35: # DF = read.csv( "../../Data/CH01/ex01-35.txt", header=TRUE, quote="'" ) d = DF$Pressure..psi. # our data n = length(d) print( c( mean(d), median(d), mean(d, trim=0.125) ) ) ds = sort( d ) print( c( ds[n/2], ds[n/2+1] ) ) # Ex. 1.36: # DF = read.csv( "../../Data/CH01/ex01-36.txt", header=TRUE, quote="'" ) d = DF$escape..sec. # our data stem( d ) print( c( mean(d), median(d) ) ) n = length(d) sd = sort( d ) print( c( sd[n/2], sd[n/2+1] ) ) time_in_mins = d / 60 # a simple scaling # Ex. 1.37: # DF = read.csv( "../../Data/CH01/ex01-37.txt", header=TRUE, quote="'" ) d = DF$Snow_cover # our data n = length( d ) stem( d ) print( mean(d) ) # Ex. 1.38: # DF = read.csv( "../../Data/CH01/ex01-38.txt", header=TRUE, quote="'" ) # Round the above data into the 5mmHg bins: # round_units = seq( 105, 140, by=5 ) hist_breaks = c( round_units[1] - 5/2, round_units + 5/2 ) # these are the bins to use in the hist command h = hist( DF$Blood_Pres..mmHg., breaks=hist_breaks, plot=FALSE ) # The rounded values are then given by: rounded_values = c() for( bi in 1:length(round_units) ){ rounded_values = c( rounded_values, rep( round_units[bi], h$counts[bi] ) ) } print( rounded_values ) print( median( rounded_values ) ) # Ex. 1.39: # DF = read.csv( "../../Data/CH01/ex01-39.txt", header=TRUE, quote="'" ) print( c( mean(DF$P_Lives), median(DF$P_Lives) ) ) s = sort( DF$P_Lives ) n = length(s) print( c( s[n/2], s[n/2+1] ) ) # Ex. 1.40: # DF = read.csv( "../../Data/CH01/ex01-27.txt", header=TRUE, quote="'" ) d = DF$lifetime # our data print( c( median(d), mean(d, trim=0.25), mean(d, trim=0.1), mean(d) ) ) # Ex. 1.41: # d = c( 'S', 'S', 'F', 'S', 'S', 'S', 'F', 'F', 'S', 'S' ) # our data p_hat = sum( d == 'S' ) / length( d ) d_copy = d d_copy[ d_copy == 'S' ] = 1 d_copy[ d_copy == 'F' ] = 0 d_copy = as.double( d_copy ) print( c( p_hat, mean(d_copy) ) ) 25*0.8 - 7 # Ex. 1.43: # d = c( 48, 79, 100, 35, 92, 86, 57, 100, 17, 29 ) # replace the 100+ with just 100 n = length( d ) ds = sort( d ) print( c( ds[n/2], ds[n/2+1] ) )