# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Ex 13.26 (EPage 525): # DF = read.csv( "../../Data/CH13/ex13-26.txt", header=TRUE, quote="'" ) plot( DF$moiscont, DF$bulkdens ) n = 6 k = 2 t_value = qt( 1-0.01/2, n-k-1 ) 491.10 + c(-1,+1) * 6.52 * t_value # Ex 13.27 (EPage 526): # DF = read.csv( "../../Data/CH13/ex13-27.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) x = 6 y_hat = 84.482 - 15.875 * x + 1.7679 * x^2 e = 53 - y_hat SSE = 61.77 SST = sum( ( DF$y - mean(DF$y) )^2 ) r2 = 1 - SSE/SST stand_resids = c( 1.91, -1.95, -0.25, 0.58, 0.90, 0.04, -0.66, 0.2 ) plot( DF$x, stand_resids ) qqnorm( stand_resids ) qqline( stand_resids ) s_yhat = 1.69 n = length(DF$x) k = 2 t_value = qt( 1-0.05/2, n-(k+1) ) ci = y_hat + c(-1,+1) * s_yhat * t_value MSE = SSE / (n-(k+1)) s = sqrt( MSE ) pi = y_hat + c(-1,+1) * sqrt( s_yhat^2 + s^2 ) * t_value # Ex 13.28 (EPage 526): # n = 6 y_hat_fn = function(x){ -113.0937 + 3.3684*x - 0.0178 * x**2 } y_hat_fn( 75 ) y_hat_fn( 60 ) SSE = 8386.43 - (-113.0937) * 210.7 - (3.3684) * 17002 - (- 0.0178) * 1419780 k = 2 MSE = SSE / ( n-(k+1) ) s = sqrt(MSE) SST = 987.35 r2 = 1 - SSE / SST beta_2_hat = - 0.0178 s_hat_beta_2 = 0.00226 t_beta_2 = beta_2_hat / s_hat_beta_2 pt( t_beta_2, n-(k+1) ) # the probabilty that we could get a value this low or lower by chance # Ex 13.29 (EPage 526): # DF = read.csv( "../../Data/CH13/ex13-29.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) n = length(DF$x) y_hat_fn = function(x){ -295.96 + 2.1885 * x - 0.0031662 * x^2 } y_hat = y_hat_fn( DF$x ) DF$y - y_hat SSE = sum( ( DF$y - y_hat )^2 ) k=2 s2 = SSE/(n-(k+1)) SST = sum( (DF$y - mean(DF$y))^2 ) r2 = 1 - SSE/SST s_beta_2_hat = 0.0004835 t_beta_2 = - 0.0031662/s_beta_2_hat pt( t_beta_2, n-(k+1) ) s_beta_1_hat = 0.4050 alpha = 0.05/2 # gives 95% combined confidence interval alpha = 0.04/2 # gives 96% combined confidence interval t_value = qt( 1 - alpha/2, n-(k+1) ) # need 1/2 to get two sided confidence intervals print( 2.1885 + c(-1,+1) * s_beta_1_hat * t_value ) print( - 0.0031662 + c(-1,+1) * s_beta_2_hat * t_value ) s_y_hat = 1.198 t_value = qt( 1 - 0.05/2, n-(k+1) ) ci = y_hat_fn( 400 ) + c(-1,+1) * s_y_hat * t_value pi = y_hat_fn( 400 ) + c(-1,+1) * sqrt( s2 + s_y_hat^2 ) * t_value # Ex 13.30 (EPage 526): # #DF = read.csv( "../../Data/CH13/ex13-30.txt", header=TRUE, quote="'" ) # this data does not match that given in the problem n = 14 y_hat_fn = function(x){ -251.6 + 1000.1 * x - 135.44 * x^2 } beta_2_hat = -135.44 s_beta_2_hat = 41.97 k = 2 t_value = qt( 1-0.05/2, n-(k+1) ) ci = beta_2_hat + c(-1,+1) * s_beta_2_hat * t_value s_y_hat = 53.5 y_hat = y_hat_fn( 2.5 ) t_value = ( y_hat - 1500 )/s_y_hat qt( 0.05, n-(k+1) ) t_value = qt( 1-0.05/2, n-(k+1) ) pi = y_hat_fn( 2.5 ) + c(-1,+1) * s_y_hat * t_value # Ex 13.31 (EPage 527/Anwser EPage 706): # #DF = read.csv( "../../Data/CH13/ex13-31.txt", header=TRUE, quote="'" ) # this data does not match that given in the problem DF = data.frame( x=c(1, 1, 2, 4, 4, 4, 6), y=c(23., 24.5, 28., 30.9, 32.0, 33.6, 20.0) ) plot( DF$x, DF$y ) m = lm( y ~ x + I(x^2), data=DF ) sm = summary(m) # the predicted values and residuals: y_hat = m$fitted.values residuals = m$residuals plot( DF$x, residuals ) sigma_y_hats = c(0.955,0.955,0.712,0.777,0.777,0.777,1.407) var_residuals = sm$sigma^2 - sigma_y_hats^2 std_residuals = sqrt( var_residuals ) standardized_residuals = residuals / std_residuals plot( DF$x, standardized_residuals ) standardized_residuals residuals/sm$sigma n = length(DF$x) k = 2 t_value = qt( 1 - 0.05/2, n-(k+1) ) y_hat_sigma = 0.777 pi = predict( m, newdata=data.frame( x=4 ) ) + c(-1,+1) * sqrt( y_hat_sigma^2 + sm$sigma^2 ) * t_value # Ex 13.32 (EPage 527/Anwser EPage 706): # DF = read.csv( "../../Data/CH13/ex13-32.txt", header=TRUE, quote="'" ) n = length(DF$x) plot( DF$x, DF$y ) # estimate the mean of x and subtract it from each sample: x_bar = mean(DF$x) DF$xp = DF$x - x_bar # x prime m = lm( y ~ xp + I(xp^2) + I(xp^3), data=DF ) sm = summary(m) ( -3*x_bar ) * (-2.3968) + 2.3964 predict( m, newdata=data.frame( xp=(4.5-x_bar) ) ) # Ex 13.33 (EPage 527/Anwser EPage 706): # DF = read.csv( "../../Data/CH13/ex13-33.txt", header=TRUE, quote="'" ) n = length(DF$x) x_mean = mean(DF$x) x_std = sqrt( (n-1) * sd(DF$x)^2 / n ) y_hat_fn = function(x){ 0.9671 - 0.0502 * x - 0.0176 * x^2 + 0.0062 * x^3 } y_hat_fn( (20-x_bar)/x_std ) y_hat_fn( (25-x_bar)/x_std ) t_value = 0.0062 / 0.0031 qt( 1-0.05/2, 7-4 ) SST = sum( ( DF$y - mean(DF$y) )^2 ) SSE_3 = 0.000063 R2_3 = 1 - SSE_3/SST # R^2 for cubic model SSE_2 = 0.00014367 R2_2 = 1 - SSE_2/SST # R^2 for the quadradic model # Compute the adjusted R^2 for each model: # k = 3 R2_adj_3 = ((n-1)*R2_3 - k)/(n-1-k) # the cubic model k = 2 R2_adj_2 = ((n-1)*R2_2 - k)/(n-1-k) # the quadradic model # Ex 13.34 (EPage 528/Anwser EPage 706): # DF = read.csv( "../../Data/CH13/ex13-34.txt", header=TRUE, quote="'" ) n = length(DF$x) x_bar = mean(DF$x) s_x = sd(DF$x) DF$xp = ( DF$x - x_bar ) / s_x # given in the book beta_0 = 0.8733 beta_1 = -0.3255 beta_2 = 0.0448 y_hat_fn = function(x){ beta_0 + beta_1 * x + beta_2 * x^2 } y_hat_fn( ( 50-x_bar ) / s_x ) SST = sum( ( DF$y - mean(DF$y) )^2 ) SSE = sum( DF$y^2 ) - beta_0 * sum( DF$y ) - beta_1 * sum( DF$y * DF$xp ) - beta_2 * sum( DF$y * DF$xp^2 ) R2 = 1 - SSE/SST t_value = beta_2 / 0.0319 k = 2 qt( 1-0.05/2, n-(k+1) ) # Ex 13.35 (EPage 527/Anwser EPage 706): # DF = read.csv( "../../Data/CH13/ex13-34.txt", header=TRUE, quote="'" ) n = length(DF$x) DF$ly = log(DF$y) m = lm( ly ~ x + I(x^2), data=DF ) sm = summary(m) sm