source('../../Data/data_loaders.R') DF = load_appendix_survival_data() # Part (a): # m = lm( TIME ~ LIV, data=DF ) #postscript("../../WriteUp/Graphics/Chapter2/ex_2_24_data_N_model.eps", onefile=FALSE, horizontal=FALSE) plot( DF$LIV, DF$TIME, type='p', xlab='LIV', ylab='TIME' ) abline(m) wm = 13 # we will want to consider this case in detail points( DF$LIV[wm], DF$TIME[wm], pch=19, cex=1.5 ) grid() #dev.off() s = summary(m) print(s) # Part (b): # print(anova(m)) # Part (c): # y_hat = m$fitted.values #postscript("../../WriteUp/Graphics/Chapter2/ex_2_24_residual_plots.eps", onefile=FALSE, horizontal=FALSE) par(mfrow=c(1,2)) plot( y_hat, m$residuals, type='p', pch=19, xlab='y_hat', ylab='residuals' ) grid() plot( DF$TIME, m$residuals, type='p', pch=19, xlab='y=TIME', ylab='residuals' ) grid() par(mfrow=c(1,1)) #dev.off() print( sprintf("corr[r, y_hat]= %5.3f; corr[r, y]: %5.3f; R^2 = %5.3f; sqrt(1-R^2)= %5.3f", cor(m$residuals, y_hat), cor(m$residuals, DF$TIME), s$r.squared, sqrt(1-s$r.squared)) ) # Part (d): # wm = which.max(abs(m$residuals)) print( sprintf("Largest abs residual at location: %d with value %10.6f", wm, m$residuals[wm]) ) # Get the standardized and studentized residuals (using MASS): # library(MASS) standardized_res = stdres(m) studentized_res = studres(m) # Get the standardized and studentized residuals (using stats): # library(stats) standardized_res = rstandard(m) studentized_res = rstudent(m) # Notice that case 13 is a huge outlier in these plots: plot( standardized_res ) plot( studentized_res ) # Compute cooks distance (from stats): # cd = cooks.distance(m) plot( cd, xlab='index', ylab='cooks distance' ) grid() print( sprintf("For case %d: standardized= %8.6f; studentized= %8.6f; cooks= %8.6f", wm, standardized_res[wm], studentized_res[wm], cd[wm]) ) # Part (e): # #postscript("../../WriteUp/Graphics/Chapter2/ex_2_24_qq_plot.eps", onefile=FALSE, horizontal=FALSE) plot( m, which=2 ) #dev.off() # Part (f): Predict response for case 13: # print( predict( m, newdata=data.frame( LIV=3.95 ), interval='confidence', level=0.95 ) ) print( predict( m, newdata=data.frame( LIV=3.95 ), interval='prediction', level=0.95 ) )