source('../../Data/data_loaders.R') DF = load_exercise_3_7_data() # Part (a): # m = lm( P ~ AGE, data=DF ) # Lets plot the data and models: # #postscript("../../WriteUp/Graphics/Chapter3/ex_3_7_part_a.eps", onefile=FALSE, horizontal=FALSE) par(mfrow=c(1,3)) plot( m$fitted.values, m$residuals, type='p', pch=19, cex=1.5, xlab='y_hat', ylab='residual' ) grid() plot(m, which=2) grid() plot(m$fitted.values, DF$P, xlab='y_hat', ylab='y') grid() par(mfrow=c(1,1)) #dev.off() # Part (b): # library(MASS) #postscript("../../WriteUp/Graphics/Chapter3/ex_3_7_part_b.eps", onefile=FALSE, horizontal=FALSE) bc = boxcox(m) #dev.off() # What lambda value gives the largest log-likelihood: # print(sprintf("Boxcox lambda estimatate = %10.6f", bc$x[ which.max(bc$y) ])) # Lets try Atkinsons method: # source('utils.R') atk_res = Atkinsons( DF, 'P', c( 'AGE' ) ) print(sprintf("Atkinsons lambda estimate: %10.6f (t-value %10.6f)", atk_res$lambda_hat, atk_res$gamma_t)) # Part (c): # DF$LOGP = log( DF$P ) m_log = lm( LOGP ~ AGE, data=DF ) DF$one_over_sqrt_P = 1/sqrt( DF$P ) m_one_over = lm( one_over_sqrt_P ~ AGE, data=DF ) # Make some predictions with different models: p1 = predict( m, newdata=data.frame( AGE=2 ), level=0.95, interval="prediction" ) p = predict( m_log, newdata=data.frame( AGE=2 ), level=0.95, interval="prediction" ) p2 = exp( p ) p = predict( m_one_over, newdata=data.frame( AGE=2 ), level=0.95, interval="prediction" ) p3 = 1/(p^2) t = p3[2]; p3[2] = p3[3]; p3[3] = t; # swap upper and lower bounds # Put everything together: # R = rbind( p1, p2, p3 ) rownames(R) = 1:dim(R)[1] R = data.frame( R ) R$pi_range = R$upr - R$lwr print( R ) # Plot the data and the three models: # #postscript("../../WriteUp/Graphics/Chapter3/ex_3_7_part_c.eps", onefile=FALSE, horizontal=FALSE) plot( DF$AGE, DF$P, type='p', pch=19, cex=1.5, xlab='AGE', ylab='P' ) all_ages = unique( DF$AGE ) points( all_ages, predict( m, newdata=data.frame( AGE=all_ages ) ), type='l', col='blue' ) points( all_ages, exp( predict( m_log, newdata=data.frame( AGE=all_ages ) ) ), type='l', col='purple' ) p = predict( m_one_over, newdata=data.frame( AGE=all_ages ) ) points( all_ages, 1/(p^2), type='l', col='green' ) legend( 'topright', c('P = beta_0 + beta_1 AGE', 'P = exp(beta_0 + beta_1 AGE)', 'P=1/sqrt(beta_0 + beta_1 AGE)'), col=c('blue', 'purple', 'green'), lty=c(1, 1, 1)) grid() #dev.off()