# # EPage 562 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Ex 13.65 (EPage 562): # DF = read.csv( "../../Data/CH13/ex13-65.txt", header=TRUE, quote="'" ) DF$Obs = NULL DF$crack_appeared = 1 DF[1:18,]$crack_appeared = 0 #postscript("../../WriteUp/Graphics/Chapter13/chap_13_ex_65_boxplots.eps", onefile=FALSE, horizontal=FALSE) boxplot( ppv ~ crack_appeared, data=DF, xlab='crack appeared (0=False;1=True)', ylab='ppv' ) #dev.off() mu_crack = mean( DF[ DF$crack_appeared==1, ]$ppv ) var_crack = var( DF[ DF$crack_appeared==1, ]$ppv ) n_crack = sum( DF$crack_appeared==1 ) mu_no_crack = mean( DF[ DF$crack_appeared==0, ]$ppv ) var_no_crack = var( DF[ DF$crack_appeared==0, ]$ppv ) n_no_crack = sum( DF$crack_appeared==0 ) mu_diff = mu_crack - mu_no_crack mu_diff_se = sqrt( var_crack / n_crack + var_no_crack / n_no_crack ) z_value = qnorm( 1 - 0.05/2 ) ci = ( mu_crack - mu_no_crack ) + c(-1,+1) * mu_diff_se * z_value plot( DF$ppv, DF$Ratio ) m = lm( Ratio ~ ppv, data=DF ) abline(m) summary(m) # plot( m$fitted.values, rstandard(m) ) # Ex 13.66 (EPage 562): # y_hat_fn = function(invthick) { -0.398 + 0.26 * invthick } y_hat = y_hat_fn( 23.5 ) y_hat = y_hat_fn( 45 ) y_hat_se = 0.253 n = 8 k = 1 t_value = qt( 1-0.05/2, n-(k+1) ) y_hat + c(-1,+1) * y_hat_se * t_value # Ex 13.67 (EPage 562): # y_hat_fn = function(x1,x2,x3,x4) { 3.5959 + 0.6566 * x1 + 0.0096 * x2 - 0.0996 * x3 - 0.008 * x4 } y_hat = y_hat_fn( 1, 170, 11, 140 ) 3.15 - y_hat SSE = 30.1033 SST = 102.3922 R2 = 1 - SSE/SST n = 20 k = 4 numer = R2/k denom = (1-R2)/(n-(k+1)) f = numer / denom 1 - pf( f, k, n-(k+1) ) # Ex 13.68 (EPage 564): # DF = read.csv( "../../Data/CH13/ex13-68.txt", header=TRUE, quote="'" ) plot( DF$Log.edge, DF$Log.time ) m = lm( Log.time ~ Log.edge, data=DF ) abline(m) log_time = predict( m, newdata=data.frame(Log.edge=log(300)) ) time = exp(log_time) # Ex 13.69 (EPage 564): # DF = read.csv( "../../Data/CH13/ex13-69.txt", header=TRUE, quote="'" ) plot( DF$Pressure, DF$Temperature ) plot( log(DF$Pressure), DF$Temperature ) # this looks very linear DF$lPressure = log(DF$Pressure) m = lm( Temperature ~ lPressure, data=DF ) abline(m) summary(m) plot( m$fitted.values, rstandard(m) ) predict( m, newdata=data.frame(lPressure=log(200)), interval='prediction', level=0.95 ) # Try a polynomial model: # plot( DF$Pressure, DF$Temperature ) m = lm( Temperature ~ Pressure + I(Pressure^2), data=DF ) summary(m) plot( m$fitted.values, rstandard(m) ) # this looks fine to me ... m = lm( Temperature ~ Pressure + I(Pressure^2) + I(Pressure^3), data=DF ) summary(m) plot( m$fitted.values, rstandard(m) ) # Ex 13.70 (EPage 564): # DF = read.csv( "../../Data/CH13/ex13-70.txt", header=TRUE, quote="'" ) n = length(DF$y) k = 3 # with interaction term (i.e. full model) l = 2 # dropping interaction term (i.e. reduced model) SST = 8.55 SSE_reduced = 5.18 SSE_full_model = 3.07 R2_full_model = 1 - SSE_full_model / SST R2_reduced = 1 - SSE_reduced / SST # For the reduced model: numer = R2_reduced / l denom = ( 1 - R2_reduced ) / ( n - (l+1) ) f = numer / denom print(f) p_value = 1 - pf( f, l, n-(l+1) ) print(p_value) # For the full model: numer = R2_full_model / k denom = ( 1 - R2_full_model ) / ( n - (k+1) ) f = numer / denom print(f) p_value = 1 - pf( f, k, n-(k+1) ) print(p_value) # Ex 13.71 (EPage 564): # DF = read.csv( "../../Data/CH13/ex13-71.txt", header=TRUE, quote="'" ) DF$Obs = NULL m_1 = lm( pallcont ~ pdconc + niconc + pH + temp + currdens, data=DF ) sm_1 = summary(m_1) plot( m_1$fitted.values, rstandard(m_1) ) # one point has a standardized residual > 3 m_2 = lm( pallcont ~ pdconc + niconc + pH + temp + currdens + I(pdconc^2) + I(niconc^2) + I(pH^2) + I(temp^2) + I(currdens^2) + I(pdconc*niconc) + I(pdconc*pH) + I(pdconc*temp) + I(pdconc*currdens) + I(niconc*pH) + I(niconc*temp) + I(niconc*currdens) + I(pH*temp) + I(pH*currdens) + I(temp*currdens), data=DF ) sm_2 = summary(m_2) SST = sum( ( DF$pallcont - mean(DF$pallcont) )^2 ) # Perform a model utility test: n = length(DF$pallcont) k = 5 + choose(5,2) + 5 # the full model l = 5 # the linear model R2_full_model = sm_2$r.squared SSE_full_model = ( 1 - R2_full_model ) * SST R2_reduced_model = sm_1$r.squared SSE_reduced_model = ( 1 - R2_reduced_model ) * SST numer = ( SSE_reduced_model - SSE_full_model ) / (k-l ) denom = SSE_full_model / (n-(k+1)) f = numer / denom print(f) p_value = 1 - pf( f, k-l, n-(k+1) ) print(p_value) m_3 = lm( pallcont ~ pdconc + niconc + pH + temp + currdens + I(pH^2), data=DF ) sm_3 = summary(m_3) # Ex 13.72 (EPage 565): # n = 37 k = 3 + choose(3,2) + 3 # linear terms + interaction terms + quadradic terms SST = 16.18555 SSE = 0.80017 R2 = 1 - SSE/SST numer = R2/k denom = (1-R2)/( n-(k+1) ) f = numer / denom print(f) p_value = 1 - pf( f, k, n-(k+1) ) print(p_value) t_value = sqrt( 2.32 ) p_value = 2 * ( 1 - pt( t_value, n-(k+1) ) ) y_hat_fn = function(wc,wt,ef) { 3.352 + 0.098 * wc + 0.222 * wt + 0.297 * ef - 0.0102 * wt^2 - 0.037 * ef^2 + 0.0128 * wc * wt } y_hat = y_hat_fn(10,12,6) y_hat_se = 0.075 # keep the old SSE value but update the value of k k = 6 sigma2 = SSE / (n-(k+1)) t_value = qt( 1 - 0.05/2, n-(k+1) ) pi = y_hat + c(-1,+1) * sqrt( y_hat_se^2 + sigma2 ) * t_value # Ex 13.73 (EPage 565): # DF = read.csv( "../../Data/CH13/ex13-73.txt", header=TRUE, quote="'" ) n = length(DF$x) k = 2 SST = 202.88 SSE = 0.29 R2 = 1 - SSE/SST # Compute the f statistic needed for the model utilty test: numer = R2/k denom = (1-R2)/(n-(k+1)) f = numer/denom print(f) p_value = 1 - pf( f, k, n-(k+1) ) print(p_value) t_beta_2 = -0.00163141 / 0.00003391 p_value = 2 * ( 1 - pt( -t_beta_2, n-(k+1) ) ) y_hat_fn = function(x) { -1.5127 + 0.391901 * x - 0.00163141 * x^2 } y_hat = y_hat_fn(100) y_hat_se = 0.1141 t_value = qt( 1 - 0.05/2, n-(k+1) ) ci = y_hat + c(-1,+1) * y_hat_se * t_value sigma2 = SSE / ( n - (k+1) ) pi = y_hat + c(-1,+1) * sqrt( y_hat_se^2 + sigma2 ) * t_value # Ex 13.74 (EPage 565): # DF = data.frame( x=c(19,20,24,27,29,30,31,39,40,43,45,50), ly=c(-8,-7.1,-7.2,-6.7,-6.2,-6.8,-5.8,-5.3,-6.0,-4.7,-5.4,-5.1) ) plot( DF$x, DF$ly ) m = lm( ly ~ x, data=DF ) abline(m) summary(m) log_10_y_hat = predict( m, newdata=data.frame(x=35) ) # prediction of log_10(y) 10^(log_10_y_hat) # Ex 13.75 (EPage 565): # DF = read.csv( "../../Data/CH13/ex13-75.txt", header=TRUE, quote="'" ) n = length(DF$x) t_beta_2 = -2.3621 / 0.3073 f = t_beta_2^2 k = 2 1 - pf( f, 1, n-(k+1) ) y_hat = 41.7422 + 6.581 - 2.3621 y_hat_se = 1.031 t_value = qt( 1 - 0.05/2, n-(k+1) ) ci = y_hat + c(-1,+1) * y_hat_se * t_value # Ex 13.76 (EPage 565): # DF = read.csv( "../../Data/CH13/ex13-76.txt", header=TRUE, quote="'" ) n = length(DF$y) k = 2 beta_1 = 0.14167 beta_1_se = 0.03301 t_value = qt( 1 - 0.05/2, n-(k+1) ) ci = beta_1 + c(-1,+1) * beta_1_se * t_value y_hat_fn = function(naoh,time) { 6.05 + 0.142 * naoh - 0.0169 * time } y_hat = y_hat_fn(9,60) y_hat_se = 0.162 t_value = qt( 1 - 0.05/2, n-(k+1) ) sigma2 = 0.4851^2 pi = y_hat + c(-1,+1) * sqrt( y_hat_se^2 + sigma2 ) * t_value # Ex 13.77 (EPage 566): # y_hat_fn = function(x1,x2) { 180.0 + 1.0 * x1 + 10.5 * x2 } y_hat = y_hat_fn(15,3.5) SST = 1210.30 SSE = 117.40 R2 = 1 - SSE / SST n = 12 k = 2 numer = R2/k denom = (1-R2)/(n-(k+1)) f = numer/denom 1 - pf( f, k, n-(k+1) ) y_hat = y_hat_fn(18,3) y_hat_se = 1.20 sigma2 = SSE/(n-(k+1)) t_value = qt( 1 - 0.05/2, n-(k+1) ) pi = y_hat + c(-1,+1) * y_hat_se * t_value # Ex 13.78 (EPage 566): # n = 20 SSE_linear_fit = 8212.5; l = 3 SSE_quadradic_fit = 5027.1 ; k = 3 + choose(3,2) + 3 numer = ( SSE_linear_fit - SSE_quadradic_fit )/(k-l) denom = SSE_quadradic_fit / (n-(k+1)) f = numer / denom print( f ) p_value = 1 - pf( f, k-l, n-(k+1) ) print( p_value ) # Ex 13.79 (EPage 566): # R2_k = c(0.354,0.453,0.511,0.550,0.562,0.570,0.572,0.575,0.575) plot( 1:length(R2_k), R2_k ) MSE_k = c(2295,1948,1742,1607,1566,1541,1535,1530,1532) plot( 1:length(MSE_k), MSE_k ) C_k = c(314,173,89.6,35.7,19.9,11.0,9.4,8.2,10.0) plot( 1:length(C_k), C_k ) R2_k = c(0.415,0.541,0.6,0.629,0.65,0.652,0.655,0.658,0.659,0.659) plot( 1:length(R2_k), R2_k ) MSE_k = c(2079,1636,1427,1324,1251,1246,1237,1229,1229,1230) plot( 1:length(MSE_k), MSE_k ) C_k = c(433,109,104,52.4,16.5,14.9,11.2,8.5,9.2,11) plot( 1:length(C_k), C_k ) # Ex 13.80 (EPage 566): # n = 20 k = 15 R2 = 0.9 numer = R2/k denom = (1-R2)/(n-(k+1)) f = numer/denom 1 - pf( f, k, n-(k+1) ) # We now want to find the value of R2 such that 1 - pf( f(R2), k, n-(k+1) ) < 0.05 : # R2s = seq( from=0.9, to=0.99, length.out=100 ) R2_to_p_value = function(R2) { numer = R2/k denom = (1-R2)/(n-(k+1)) f = numer/denom 1 - pf( f, k, n-(k+1) ) } p_values = sapply( R2s, R2_to_p_value ) plot( R2s, p_values, xlab='R2 value', ylab='p-value' ) abline(h=0.05) grid() # Ex 13.81 (EPage 567): # n = 117 k = 5 R2 = 0.827 numer = R2/k denom = (1-R2)/(n-(k+1)) f = numer/denom 1 - pf( f, k, n-(k+1) ) beta_1_hat = 0.041 beta_1_se = 0.016 t_value = qt( 1 - 0.1/2, n-(k+1) ) ci = beta_1_hat + c(-1,+1) * beta_1_se * t_value beta_4_hat = 0.041 beta_4_se = 0.007 t_value = beta_4_hat / beta_4_se 2 * ( 1 - pt( t_value, n-(k+1) ) ) y_hat_fn = function(x1,x2,x3,x4,x5) { 19.607 + 0.041 * x1 + 0.071 * x2+ 0.001 * x3 + 0.041 * x4 + 0.687 * x5 } y_hat = y_hat_fn(166,60,788,68,95) y_actual = 103