# # EPage 493 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Ex 12.68: # DF = read.csv( "../../Data/CH12/ex12-68.txt", header=TRUE, quote="'" ) plot( DF$Height, DF$Price ) m = lm( Price ~ Height, data=DF ) sm = summary(m) print(sm) predict( m, newdata=data.frame(Height=27), interval='prediction', level=0.95 ) # Ex 12.69: # n = length(DF$Height) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) beta_1_ci = m$coefficients[2] + c(-1,+1) * t_value * sm$coefficients[2,2] predict( m, newdata=data.frame(Height=25), interval='confidence', level=0.95 ) predict( m, newdata=data.frame(Height=25), interval='prediction', level=0.95 ) mean(DF$Height) sqrt( sm$r.squared ) # Ex 12.70: # DF = read.csv( "../../Data/CH12/ex12-70.txt", header=TRUE, quote="'" ) plot( DF$X.DAA, DF$Age ) # The first prediction interval approach: # m1 = lm( Age ~ X.DAA, data=DF ) predict( m1, newdata=data.frame(X.DAA=2.01), interval='prediction', level=0.95 ) # The second "inverse" approach: # x = Age # y = X.DAA # m2 = lm( X.DAA ~ Age, data=DF ) # solve for x_hat: # y_star = 2.01 x_hat = ( y_star - m2$coefficients[1] ) / m2$coefficients[2] # Compute the prediction interval of Age i.e. x_hat: # n = length(DF$Age) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) x_bar = mean( DF$Age ) S_xx = sum( DF$Age^2 ) - sum( DF$Age )^2 / n sm2 = summary(m2) s = sm2$sigma SE = ( s / m2$coefficients[2] ) * sqrt( 1 + 1/n + (x_hat - x_bar)^2 / S_xx ) PI = x_hat + c(-1,+1) * t_value * SE print(PI) # Ex 12.71: # DF = read.csv( "../../Data/CH12/ex12-71.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) m = lm( y ~ x, data=DF ) sm = summary(m) print(sm) n = length(DF$x) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) ci = sm$coefficients[2] + c(-1,+1) * t_value * sm$coefficients[2,2] sqrt( sm$r.squared ) # Ex 12.72: # x = 35.5 326.976038 - 8.403964 * x sqrt( 0.9134 ) DF = read.csv( "../../Data/CH12/ex12-72.txt", header=TRUE, quote="'" ) range( DF$CO ) # Ex 12.73: # DF = read.csv( "../../Data/CH12/ex12-73.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) sqrt( 0.5073 ) x = 50.0 y_hat = 0.787218 + 0.007570 * x n = length(DF$x) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) # Since we are told that we can use the data we compute the things we need for the standard error in y_hat: x_bar = mean( DF$x ) S_xx = sum( DF$x^2 ) - sum( DF$x )^2/n s = 0.20308 # from the SAS output s_y_hat = s * sqrt( 1/n + ( x - x_bar )^2 / S_xx ) y_hat + c(-1,+1) * t_value * s_y_hat x = 30.0 y_hat = 0.787218 + 0.007570 * x # Ex 12.74: # DF = read.csv( "../../Data/CH12/ex12-74.txt", header=TRUE, quote="'" ) plot( DF$CO, DF$NO3 ) m = lm( NO3 ~ CO, data=DF ) sm = summary(m) predict( m, newdata=data.frame(CO=400), interval='prediction', level=0.95 ) m2 = lm( NO3 ~ CO, data=DF[-9,] ) # drop the data that looks like it might be an outlier m$coefficients m2$coefficients # Ex 12.75: # # If we want to model: stride_rate ~ speed then x = speed and y = stride_rate # n = 11 sum_x = 205.4 sum_y = 35.16 sum_x2 = 3880.08 sum_y2 = 112.681 sum_xy = 660.130 beta_1 = ( sum_xy - sum_x * sum_y / n ) / ( sum_x2 - sum_x^2 / n ) beta_0 = sum_y / n - beta_1 * ( sum_x / n ) SST = sum_y2 - sum_y^2 / n SSE = sum_y2 - beta_0 * sum_y - beta_1 * sum_xy R2 = 1 - SSE / SST print( sprintf("beta_0= %10.6f; beta_1= %10.6f; R2= %10.6f", beta_0, beta_1, R2) ) # # If we want to model: speed ~ stride_rate then x = stride_rate and y = speed # sum_y = 205.4 sum_x = 35.16 sum_y2 = 3880.08 sum_x2 = 112.681 sum_xy = 660.130 beta_1 = ( sum_xy - sum_x * sum_y / n ) / ( sum_x2 - sum_x^2 / n ) beta_0 = sum_y / n - beta_1 * ( sum_x / n ) SST = sum_y2 - sum_y^2 / n SSE = sum_y2 - beta_0 * sum_y - beta_1 * sum_xy R2 = 1 - SSE / SST print( sprintf("beta_0= %10.6f; beta_1= %10.6f; R2= %10.6f", beta_0, beta_1, R2) ) # Ex 12.76: # DF = read.csv( "../../Data/CH12/ex12-76.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) m = lm( y ~ x, data=DF ) sm = summary(m) print(sm) n = length(DF$x) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) sm$coefficients[2,1] + c(-1,+1) * t_value * sm$coefficients[2,2] S_xx = sum( ( DF$x - mean(DF$x) )^2 ) x_new = c( 16, 16, 18, 18, rep(20, 4), rep(22, 4), rep(24, 2), rep(26, 2) ) S_xx_new = sum( ( x_new - mean(x_new) )^2 ) cis = predict( m, newdata=data.frame(x=c(18,22)), interval='confidence', level=0.95 ) pis = predict( m, newdata=data.frame(x=c(18,22)), interval='prediction', level=0.95 ) # Ex 12.77: # DF = read.csv( "../../Data/CH12/ex12-77.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) m = lm( y ~ x, data=DF ) sm = summary(m) print(sm) p = predict( m, newdata=data.frame(x=19.1) ) r = 0.68 - p n = length(DF$x) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) sm$coefficients[2,1] + c(-1,1) * t_value * sm$coefficients[2,2] # Ex 12.78: # n = 15 sum_x = 218.1 sum_y = 1693.6 sum_x2 = 3577.01 sum_y2 = 191672.90 sum_xy = 24252.54 beta_1 = ( sum_xy - sum_x * sum_y / n ) / ( sum_x2 - sum_x^2 / n ) beta_0 = sum_y / n - beta_1 * ( sum_x / n ) SSE = sum_y2 - beta_0 * sum_y - beta_1 * sum_xy s = sqrt( SSE/(n-2) ) x_bar = sum_x / n S_xx = sum_x2 - sum_x^2 / n s_beta_hat_0 = s * sqrt( 1 / n + x_bar^2 / S_xx ) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) ci = beta_0 + c(-1,+1) * t_value * s_beta_hat_0 # Ex 12.80: Show that the sample correlation change for y vs y^2 # x = seq( 1, 100, length.out=10 ) y = x + rnorm( length(x), 0, 0.1 ) # the same data (with some noise) cor( x, y ) # close to one y = y^2 cor( x, y ) # smaller than one # Ex 12.81 (b) (See Ex. 12.64 for r calculation) # # Ex 12.84: # DF = read.csv( "../../Data/CH12/ex12-84.txt", header=TRUE, quote="'", col.names=c("Temp","Removal") ) plot( DF$Temp, DF$Removal ) m = lm( Removal ~ Temp, data=DF ) sm = summary(m) predict( m, newdata=data.frame(Temp=10.50) ) n = length(DF$Temp) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) sm$coefficients[2,1] + c(-1,+1) * t_value * sm$coefficients[2,2] # Add the additional data point: # DF_new = data.frame( Temp=c( DF$Temp, 6.53 ), Removal=c( DF$Removal, 96.55 ) ) m_new = lm( Removal ~ Temp, data=DF_new ) sm_new = summary(m_new) # Ex 12.85: # DF = read.csv( "../../Data/CH12/ex12-85.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) m = lm( y ~ x, data=DF ) summary(m) abline( m ) # Ex 12.86: # DF = read.csv( "../../Data/CH12/ex12-86.txt", header=TRUE, quote="'" ) plot( DF$BOD, DF$HW ) m = lm( HW ~ BOD, data=DF ) sm = summary(m) abline( m ) n = length(DF$BOD) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n-2 ) sm$coefficients[2,1] + c(-1,+1) * t_value * sm$coefficients[2,2] m2 = lm( HW ~ BOD, data=DF ) summary(m2) # Ex 12.87: # # From the previous problem compute the need items: DF = read.csv( "../../Data/CH12/ex12-73.txt", header=TRUE, quote="'" ) n1 = length( DF$x ) SS_x1 = sum( ( DF$x - mean(DF$x) )^2 ) m = lm( y ~ x, data=DF ) beta_hat_0 = m$coefficients[1] beta_hat_1 = m$coefficients[2] SSE_1 = sum( DF$y^2 ) - beta_hat_0 * sum( DF$y ) - beta_hat_1 * sum( DF$x * DF$y ) n2 = 15 SS_x2 = 7152.5578 gamma_hat_1 = 0.006845 SSE_2 = 0.51350 sigma_hat = sqrt( ( SSE_1 + SSE_2 )/ ( n1 + n2 - 4 ) ) t_stat = ( beta_hat_1 - gamma_hat_1 ) / ( sigma_hat * sqrt( 1 / SS_x1 + 1 / SS_x2 ) ) alpha = 0.05 t_value = qt( 1 - 0.5 * alpha, n1 + n2 - 4 )