#
# Epage 
#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

if(is.null(version$language) == FALSE){ 
  require(alr3)
}else{
  library(alr3)
}

data(florida)

postscript("../../WriteUp/Graphics/Chapter9/prob_8_buch_vs_bush.eps", onefile=FALSE, horizontal=FALSE)
plot( florida$Bush, florida$Buchanan, xlab="Bush", ylab="Buchanan" )
m0 <- lm( Buchanan ~ Bush, data=florida )
abline(m0) 
dev.off()

ri <- rstandard( m0 )
ti <- ri * sqrt( ( m0$df - 1 ) / (m0$df-ri^2) ) 

spot <- match( max(abs(ti)), abs(ti) )

florida[spot,]

# test if this point is an outlier: 
#
n <- length(ri)
pprime <- 2
n * 2 * ( 1 - pt(ti[spot], n-pprime-1) )

plot( ti )

ti[spot] <- 0 # zero out this point in our search for the next outlier 
spot <- match( max(abs(ti)), abs(ti) ) # find the next largest value 

florida[spot,]

# use the Bonferroni bound to determine if this is an outlier
# this value is larger than one and so it is not an outlier
# 
n * 2 * ( 1 - pt(ti[spot], n-pprime-1) )


# lets first try to transform our independent variable Bush:
#
attach(florida)
inv.tran.plot(Bush,Buchanan,lambda=c(-1,0,1))
a <- inv.tran.estimate(Bush,Buchanan)
unlist(a)

# this predicts a value ~ 0.7 which visually looks much like the
# value of 1.0 ... but lets transform this data anyway
#
BushT <- powtran( Bush, a$lambda[1] ) 

# lets next try to transform our dependent variable Buchanan:
# 
m0 <- lm( Buchanan ~ BushT )
inv.tran.plot( Buchanan, predict(m0) ) 
a <- inv.tran.estimate( Buchanan, predict(m0) )
unlist(a) 

# this is different enought that lets use its value ~ 0.23 
#
BuchananT <- powtran( Buchanan, a$lambda[1] ) 

# if we want we can look at these three models:
#
plot( Bush, Buchanan )
m1 <- lm( Buchanan ~ Bush )
abline(m1) 
summary( m1 ) 

plot( Bush, BuchananT )
m2 <- lm( BuchananT ~ Bush )
abline(m2) 
summary( m2 ) 

postscript("../../WriteUp/Graphics/Chapter9/prob_8_buchT_vs_bushT.eps", onefile=FALSE, horizontal=FALSE)
plot( BushT, BuchananT )
m3 <- lm( BuchananT ~ BushT )
abline(m3)
dev.off()
summary(m3)

# the model with both transformed variables seems much better in that its R^2 is larger.

ri <- rstandard( m3 )
ti <- ri * sqrt( ( m3$df - 1 ) / (m3$df-ri^2) ) 

spot <- match( max(abs(ti)), abs(ti) )

florida[spot,]

# test if this point is an outlier: 
#
n <- length(ri)
pprime <- 2
n * 2 * ( 1 - pt(ti[spot], n-pprime-1) )

ti[spot] <- 0 # zero out this point in our search for the next outlier 
spot <- match( max(abs(ti)), abs(ti) ) # find the next largest value 

florida[spot,]

# use the Bonferroni bound to determine if this is an outlier
# this value is larger than one and so it is not an outlier
# 
n * 2 * ( 1 - pt(ti[spot], n-pprime-1) )