# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- source('utils.R') # Ex. 1.62 # n = 4 x_bar = 76831 r1 = 4*x_bar - 76682 - 77048 # x + y = r1 r2 = 3 * 180^2 - ( (76682-x_bar)^2 + (77048-x_bar)^2 ) # y_sols = polyroot( c( (r1-x_bar)^2 + x_bar^2 - r2, -2*r1, 2 ) ) x_sols = r1 - y_sols # Ex. 1.63: # # DF = read.csv( "../../Data/CHO1.ex01-63.txt", header=TRUE, quote="'" ) # this does not look like the data given in this exercise # d = c( 6.3, 6.4, 7.7, 8.4, 8.5, 8.8, 8.9, 9.0, 9.1, 10.0, 10.1, 10.2, 10.6, 10.6, 10.7, 10.7, 10.8, 10.9, 11.1, 11.2, 11.2, 11.4, 11.9, 11.9, 12.2, 13.1 ) boxplot( d ) x_tilde = median( d ) lower_fourth = median( d[ d<=x_tilde ] ) upper_fourth = median( d[ d>=x_tilde ] ) f_s = fourth_spread(d) print( sprintf( "mean: %5.3f, median: %5.3f", mean(d), x_tilde ) ) print( sprintf( "lower_fourth= %5.3f, upper_fourth= %5.3f, fourth_spread= %5.3f", lower_fourth, upper_fourth, f_s ) ) # Ex. 1.64: # #DF = read.csv( "../../Data/CH01/ex01-64.txt", header=TRUE, quote="'" ) # this does not look like the data given in this exercise # DF = data.frame( emission=c( 13.8, 18.3, 32.2, 32.5, 118, 149, 232, 36 ), type=c( rep( 'HC', 4 ), rep( 'CO', 4 ) ) ) boxplot( emission ~ type, data=DF ) hc_emission = DF[ DF$type == 'HC', ]$emission co_emission = DF[ DF$type == 'CO', ]$emission print( sprintf("HC sd= %6.3f; CO sd= %6.3f", sd(hc_emission), sd(co_emission)) ) print( sprintf("HC sd/x_bar= %6.3f; CO sd/x_bar= %6.3f", sd(hc_emission) / mean(hc_emission) , sd(co_emission)/mean(co_emission)) ) # Ex. 1.65: # d = c( 6, 7, 17, 30, 43, 28, 22, 13, 3 ) p1 = sum(d[3:length(d)]) / sum(d) p2 = sum(d[1:7]) / sum(d) p3 = ( sum(d[1:4]) + 0.5*43 ) / sum(d) print( c( p1, p2, p3 ) ) # Ex. 1.66: # DF = read.csv( "../../Data/CH01/ex01-66.txt", header=TRUE, quote="'" ) initial_DF = data.frame( concentration=c( DF$Init.Se, DF$Init.Con[1:14] ), type=c( rep( 'supplement', 16 ), rep( 'control', 14 ) ) ) boxplot( concentration ~ type, data=initial_DF ) final_DF = data.frame( concentration=c( DF$Final.Se, DF$Final.Co[1:14] ), type=c( rep( 'supplement', 16 ), rep( 'control', 14 ) ) ) boxplot( concentration ~ type, data=final_DF ) # Ex. 1.67: # DF = read.csv( "../../Data/CH01/ex01-67.txt", header=TRUE, quote="'" ) boxplot( diameter..cm. ~ Gender, data=DF ) male = DF[ DF['Gender'] == 'M', ]$diameter..cm. female = DF[ DF['Gender'] == 'F', ]$diameter..cm. print( c(mean(male), mean(male, trim=0.1)) ) print( c(mean(female), mean(female, trim=0.1)) ) # Ex. 1.70: # DF = read.csv( "../../Data/CH01/ex01-70.txt", header=TRUE, quote="'" ) DF_combined = data.frame( measurement=c( DF$Weight, DF$Treadmil ), type=c( rep( 'weight', length(DF$Weight) ), rep( 'treadmil', length(DF$Treadmil) ) ) ) boxplot( measurement ~ type, data=DF_combined ) d = DF$Weight - DF$Treadmil boxplot( d ) # Ex. 1.72: # DF = read.csv( "../../Data/CH01/ex01-72.txt", header=TRUE, quote="'" ) boxplot( volume ~ Health_status, data=DF ) # Ex. 1.73/1.74: # DF = read.csv( "../../Data/CH01/ex01-73.txt", header=TRUE, quote="'" ) stem( DF$cadence ) boxplot( DF$cadence ) # Get mode of the cadence data: # my_mode <- function(x) { ux <- unique(x) ux[which.max(tabulate(match(x, ux)))] } print( my_mode(DF$cadence) ) # Ex. 1.75: # DF = read.csv( "../../Data/CH01/ex01-75.txt", header=TRUE, quote="'" ) boxplot( Fatigue_limit..Mpa. ~ Types.of.Wire, data=DF ) # Apply the median function in each group: # by( DF$Fatigue_limit..Mpa., DF$Types.of.Wire, median ) # Ex. 1.77: # DF = read.csv( "../../Data/CH01/ex01-77.txt", header=TRUE, quote="'" ) d = DF$pitting_depth..mm. stem( d ) hist( d, breaks=cumsum( c( 0, 2, 2, 2, 4, 10, 10 ) ) ) # Ex 1.79 # n = 15 x_bar_n = 12.58 s_n_2 = .512^2 x_16 = 11.8 x_bar_np1 = ( 1/(n+1) ) * ( n * x_bar_n + x_16 ) s_np1_2 = ( (n-1) * s_n_2 + (n/(n+1)) * ( ( x_16 - x_bar_n )^2 ) ) / n print( c( x_bar_np1, sqrt(s_np1_2) ) ) # Ex 1.80 # DF = read.csv( "../../Data/CH01/ex01-80.txt", header=TRUE, quote="'" ) d = DF$Frequenc p1 = sum( d[1:7] ) / sum(d) p2 = sum( d[13:15] ) / sum(d) print( c(p1, p2) ) # Ex 1.82 # d = c( 47, 54, 53, 50, 46, 46, 47, 50, 51, 50, 46, 52, 50, 50 ) exponential_smoothing = function(x, alpha=0.5){ smooth = c( x[1] ) for( ii in 2:length(x) ){ smooth = c( smooth, alpha*x[ii] + (1-alpha)*smooth[ii-1] ) } smooth } d_smooth_p1 = exponential_smoothing(d, alpha=0.1) d_smooth_p5 = exponential_smoothing(d, alpha=0.5) plot( d, type='b', col='black', pch=19 ) lines( d_smooth_p1, type='l', col='blue', pch=1 ) lines( d_smooth_p5, type='l', col='red', pch=1 ) legend( 'topright', c('raw data', '0.1 ema', '0.5 ema'), cex=1.0, lwd=2, bty='n', lty=c(1,1,1), col=c('black', 'blue', 'red'), pch=c(19, NA, NA) ) # Ex. 1.83: # DF = read.csv( "../../Data/CH01/ex01-83.txt", header=TRUE, quote="'" ) y = DF$rainfall..acre.feet. n = length(y) y_tilde = median(y) y_sorted = sort(y) paired_data_x = c() paired_data_y = c() for( ii in 1:floor(n/2) ){ end_ii = n-(ii-1) paired_data_x = c( paired_data_x, y_sorted[end_ii] - y_tilde ) paired_data_y = c( paired_data_y, y_tilde - y_sorted[ii] ) } plot( paired_data_x, paired_data_y, col='black' ) grid() abline( 0, 1 )