# # Epage # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- if(is.null(version$language) == FALSE){ require(alr3) }else{ library(alr3) } data(florida) postscript("../../WriteUp/Graphics/Chapter9/prob_8_buch_vs_bush.eps", onefile=FALSE, horizontal=FALSE) plot( florida$Bush, florida$Buchanan, xlab="Bush", ylab="Buchanan" ) m0 <- lm( Buchanan ~ Bush, data=florida ) abline(m0) dev.off() ri <- rstandard( m0 ) ti <- ri * sqrt( ( m0$df - 1 ) / (m0$df-ri^2) ) spot <- match( max(abs(ti)), abs(ti) ) florida[spot,] # test if this point is an outlier: # n <- length(ri) pprime <- 2 n * 2 * ( 1 - pt(ti[spot], n-pprime-1) ) plot( ti ) ti[spot] <- 0 # zero out this point in our search for the next outlier spot <- match( max(abs(ti)), abs(ti) ) # find the next largest value florida[spot,] # use the Bonferroni bound to determine if this is an outlier # this value is larger than one and so it is not an outlier # n * 2 * ( 1 - pt(ti[spot], n-pprime-1) ) # lets first try to transform our independent variable Bush: # attach(florida) inv.tran.plot(Bush,Buchanan,lambda=c(-1,0,1)) a <- inv.tran.estimate(Bush,Buchanan) unlist(a) # this predicts a value ~ 0.7 which visually looks much like the # value of 1.0 ... but lets transform this data anyway # BushT <- powtran( Bush, a$lambda[1] ) # lets next try to transform our dependent variable Buchanan: # m0 <- lm( Buchanan ~ BushT ) inv.tran.plot( Buchanan, predict(m0) ) a <- inv.tran.estimate( Buchanan, predict(m0) ) unlist(a) # this is different enought that lets use its value ~ 0.23 # BuchananT <- powtran( Buchanan, a$lambda[1] ) # if we want we can look at these three models: # plot( Bush, Buchanan ) m1 <- lm( Buchanan ~ Bush ) abline(m1) summary( m1 ) plot( Bush, BuchananT ) m2 <- lm( BuchananT ~ Bush ) abline(m2) summary( m2 ) postscript("../../WriteUp/Graphics/Chapter9/prob_8_buchT_vs_bushT.eps", onefile=FALSE, horizontal=FALSE) plot( BushT, BuchananT ) m3 <- lm( BuchananT ~ BushT ) abline(m3) dev.off() summary(m3) # the model with both transformed variables seems much better in that its R^2 is larger. ri <- rstandard( m3 ) ti <- ri * sqrt( ( m3$df - 1 ) / (m3$df-ri^2) ) spot <- match( max(abs(ti)), abs(ti) ) florida[spot,] # test if this point is an outlier: # n <- length(ri) pprime <- 2 n * 2 * ( 1 - pt(ti[spot], n-pprime-1) ) ti[spot] <- 0 # zero out this point in our search for the next outlier spot <- match( max(abs(ti)), abs(ti) ) # find the next largest value florida[spot,] # use the Bonferroni bound to determine if this is an outlier # this value is larger than one and so it is not an outlier # n * 2 * ( 1 - pt(ti[spot], n-pprime-1) )