# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # # EPage 140 # save_plots = F DF = read.csv( "../../Data/bodyfat.csv", header=TRUE ) # Lets look for the most significant predictors: # m = lm( PBF ~ . - Density, data=DF ) summary(m) # Look for transformations of the top predictors that will make them more normal: # par(mfrow=c(1,2)) plot( density( DF$Abdomen_Circ ), main='Abdomen_Circ' ) # skewed to the right plot( density( log(DF$Abdomen_Circ) ), main='log(Abdomen_Circ)' ) # slightly better (i.e. less skewed) par(mfrow=c(1,1)) par(mfrow=c(1,2)) plot( density( DF$Wrist_Circ ), main='Wrist_Circ' ) # skewed to the right plot( density( log(DF$Wrist_Circ) ), main='log(Wrist_Circ)' ) # not much difference par(mfrow=c(1,1)) # Lets use backwards stepwise regression to remove predictors: # sm = step( m ) summary(sm) # Remove Hip_Circ: # sm1 = update(sm, . ~ . - Hip_Circ) summary( sm1 ) # Can/should we remove Neck_Circ? sm2 = update(sm1, . ~ . - Neck_Circ) summary(sm2) # Can/should we remove Thigh_Circ? sm3 = update(sm1, . ~ . - Thigh_Circ) summary(sm3) # Look at Cook's D to find the samples that have undue influence on the linear models coefficients: # if( save_plots ){ postscript ("../../WriteUp/Graphics/Chapter6/pbf_plot_which_4.eps", onefile=FALSE, horizontal=FALSE) } plot( sm1, which=4 ) if( save_plots ){ dev.off() } # Print the samples that had the most extream values: # feats = c ('PBF','Age','Weight','Neck_Circ','Abdomen_Circ','Thigh_Circ','Forearm_Circ','Wrist_Circ') summary( DF[ , feats ] ) DF[ c( 65, 139, 193 ), feats ] # Look for outliers to our model: # if( save_plots ){ postscript ("../../WriteUp/Graphics/Chapter6/pbf_plot_which_1.eps", onefile=FALSE, horizontal=FALSE) } plot( sm1, which=1 ) if( save_plots ){ dev.off() } DF[ c( 133, 136, 201 ), feats ]