# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Ex 1.44: # DF = read.csv( "../../Data/CH01/ex01-44.txt", header=TRUE, quote="'" ) d = DF$O2_consump print( range( d ) ) n = length( d ) m = mean( d ) s2 = sum( ( d - m )^2 )/( n-1 ) print( s2 ) print( sqrt(s2) ) # s^2 with the short cut method: # SXX = sum(d^2) - sum(d)^2 / n print( SXX / (n-1) ) # Ex 1.45: # DF = read.csv( "../../Data/CH01/ex01-45.txt", header=TRUE, quote="'" ) d = DF$Gpa n = length( d ) xbar = mean( d ) deviations = d - xbar s2 = sum( deviations^2 ) / (n-1) sqrt( s2 ) # s^2 with the short cut method: # SXX = sum(d^2) - sum(d)^2 / n print( SXX / (n-1) ) d_trans = d - 100 SXX = sum(d_trans^2) - sum(d_trans)^2 / n print( SXX / (n-1) ) # Ex 1.46: # DF = read.csv( "../../Data/CH01/ex01-46.txt", header=TRUE, quote="'" ) d = DF$viscosity..cP. n = length( d ) print( c( mean( d ), median( d ) ) ) SXX = sum(d^2) - sum(d)^2 / n print( SXX / (n-1) ) # Ex 1.47: # d = c( 87, 93, 96, 98, 105, 114, 128, 131, 142, 168 ) print( c( median(d), mean(d), sd(d) ) ) # Ex 1.48: # DF = read.csv( "../../Data/CH01/ex01-34.txt", header=TRUE, quote="'" ) mask_urban = DF$Urban.Farm == 'U' n_urban = sum( mask_urban ) urban = DF$dust..EU.mg.[ mask_urban ] mask_farm = DF$Urban.Farm == 'F' n_farm = sum( mask_farm ) farm = DF$dust..EU.mg.[ mask_farm ] print( sprintf("urban sd= %10.6f; farm sd= %10.6f", sd(urban), sd(farm)) ) source('utils.R') print( sprintf("urban fourth spread= %10.6f; farm fourth spread= %10.6f", fourth_spread(urban), fourth_spread(farm)) ) DF = read.csv( "../../Data/CH01/ex01-48.txt", header=TRUE, quote="'" ) # holds all the endotoxin measurements boxplot( endotoxin ~ Urban.Farm, data=DF ) # Ex. 1.49: # DF = read.csv( "../../Data/CH01/ex01-49.txt", header=TRUE, quote="'" ) d = DF$S_lamina..mm2. n = length( d ) sum_xi = sum(d) sum_xi2 = sum(d^2) print( c( sum_xi, sum_xi2 ) ) SXX = sum_xi2 - sum_xi^2 / n s2 = SXX / (n-1) s = sqrt( s2 ) print( c( s2, s ) ) # Ex. 1.50: # DF = read.csv( "../../Data/CH01/ex01-50.txt", header=TRUE, quote="'" ) d = DF$awards m = mean( d ) s = sd( d ) print( m + 2 * s ) # Ex. 1.51: # DF = read.csv( "../../Data/CH01/ex01-51.txt", header=TRUE, quote="'" ) d = DF$time..min. print( c( var(d), sd(d) ) ) # Ex. 1.52: # d = c( 0.3, 0.9, 1.0, 1.3 ) n = 5 s = sum( d ) new_d = c( d, -s ) mu = 1 print( mu + new_d ) # Ex. 1.53: # DF = read.csv( "../../Data/CH01/ex01-49.txt", header=TRUE, quote="'" ) d = DF$S_lamina..mm2. x_tilde = median(d) lower_fourth = median( d[ d<=x_tilde ] ) upper_fourth = median( d[ d>=x_tilde ] ) print( c( lower_fourth, upper_fourth, fourth_spread(d) ) ) sd = sort( d ) n = length( d ) median_location = (n+1)/2 # The data in the lower half: # lower_half = sd[1:median_location] print( lower_half ) lower_half[5] print( fourth_spread( c( d, 4.6 ) ) ) # Ex. 1.54: # # DF = read.csv( "../../Data/CH01/ex01-54.txt", header=TRUE, quote="'" ) # this does not seem to be the same data as in the text # d = DF$Strength..N. # d = c( 22.2, 40.4, 16.4, 73.7, 36.6, 109.9, 30.0, 4.4, 33.1, 66.7, 81.5 ) x_tilde = median( d ) lower_fourth = median( d[ d<=x_tilde ] ) upper_fourth = median( d[ d>=x_tilde ] ) print( c( lower_fourth, upper_fourth, fourth_spread(d) ) ) boxplot( d ) print( c( lower_fourth - 1.5 * fourth_spread(d), upper_fourth + 1.5 * fourth_spread(d) ) ) print( c( lower_fourth - 3.0 * fourth_spread(d), upper_fourth + 3.0 * fourth_spread(d) ) ) print( range( d ) ) # Ex. 1.55: # DF = read.csv( "../../Data/CH01/ex01-36.txt", header=TRUE, quote="'" ) d = DF$escape..sec. x_tilde = median( d ) lower_fourth = median( d[ d<=x_tilde ] ) upper_fourth = median( d[ d>=x_tilde ] ) print( c( lower_fourth, upper_fourth, fourth_spread(d) ) ) print( c( lower_fourth - 1.5 * fourth_spread(d), upper_fourth + 1.5 * fourth_spread(d) ) ) # mild outliers print( c( lower_fourth - 3.0 * fourth_spread(d), upper_fourth + 3.0 * fourth_spread(d) ) ) # extreme outliers boxplot( d ) # Ex. 1.56: # # DF = read.csv( "../../Data/CH01/ex01-56.txt", header=TRUE, quote="'" ) # this does not seem to be the data for this problem # d = DF$alcohol.. d = c( 30, 30, 60, 63, 70, 79, 87, 90, 101, 102, 115, 118, 119, 119, 120, 125, 140, 145, 172, 182, 183, 191, 222, 244, 291, 511 ) boxplot( d ) # Ex. 1.57: # lower_fourth = 196.0 upper_fourth = 216.8 f_s = upper_fourth - lower_fourth print( c( lower_fourth - 1.5*f_s, upper_fourth + 1.5*f_s ) ) # mild outliers print( c( lower_fourth - 3*f_s, upper_fourth + 3*f_s ) ) # extream outliers # Ex. 1.59: # DF = read.csv( "../../Data/CH01/ex01-59.txt", header=TRUE, quote="'" ) for( type in c('ED', 'Non') ){ mask = DF$Overdose.Type == type d = DF[mask,]$Cocaine..mg.L. x_tilde = median( d ) lower_fourth = median( d[ d<=x_tilde ] ) upper_fourth = median( d[ d>=x_tilde ] ) f_s = fourth_spread(d) print( sprintf( "Type: %6s, lower_fourth= %5.3f, upper_fourth= %5.3f, fourth_spread= %5.3f", type, lower_fourth, upper_fourth, f_s ) ) small_mask = d <= (lower_fourth - 1.5 * f_s) large_mask = d >= (lower_fourth + 1.5 * f_s) print( sprintf( "Mild outliers: %2d too small (less than %5.3f), %2d too large (larger than %5.3f)", sum(small_mask), (lower_fourth -1.5 * f_s), sum(large_mask), (lower_fourth + 1.5 * f_s) ) ) small_mask = d <= (lower_fourth - 3.0 * f_s) large_mask = d >= (lower_fourth + 3.0 * f_s) print( sprintf( "Extreme outliers: %2d too small (less than %5.3f), %2d too large (larger than %5.3f)", sum(small_mask), (lower_fourth - 3.0 * f_s), sum(large_mask), (lower_fourth + 3.0 * f_s) ) ) } # Produce a boxplot # boxplot( Cocaine..mg.L. ~ Overdose.Type, data=DF ) # Ex. 1.60: # DF = read.csv( "../../Data/CH01/ex01-60.txt", header=TRUE, quote="'" ) # Produce a boxplot # boxplot( burst_strength..lb.in2. ~ welds._type, data=DF )