source('../../Data/data_loaders.R') DF = load_appendix_cigarettes_data() # Part (a): # m = lm( CAR ~ NIC, data=DF ) #postscript("../../WriteUp/Graphics/Chapter2/ex_2_25_data_N_model.eps", onefile=FALSE, horizontal=FALSE) plot( DF$NIC, DF$CAR, type='p', xlab='NIC', ylab='CAR' ) abline(m) wm = 3 # we will want to consider this case in detail points( DF$NIC[wm], DF$CAR[wm], pch=19, cex=1.5 ) grid() #dev.off() print( summary(m) ) print( anova(m) ) print( sprintf( "correlation between CAR and NIC= %10.6f; linear model R2: %10.6f", cor( DF$CAR, DF$NIC ), summary(m)$r.squared ) ) # Part (b): # #postscript("../../WriteUp/Graphics/Chapter2/ex_2_25_residual_vs_fitted.eps", onefile=FALSE, horizontal=FALSE) plot( m, which=1 ) #dev.off() library(stats) standardized_res = rstandard(m) studentized_res = rstudent(m) print( which.max( abs(standardized_res) ) ) print( which.max( abs(studentized_res) ) ) # Note case 3 on these plots: plot( standardized_res ) plot( studentized_res ) # Compute cooks distance (from stats): # cd = cooks.distance(m) plot( cd, xlab='index', ylab='cooks distance' ) grid() wm = 3 # we want to consider this case print( sprintf("For case %d: standardized= %10.6f; studentized= %10.6f; cooks= %10.6f", wm, standardized_res[wm], studentized_res[wm], cd[wm]) ) # Lets refit with this case excluded: # m2 = lm( CAR ~ NIC, data=DF[-3,] ) print(summary(m2))