#
# Epage 164 
#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

# version$language == "R" for R version$language == NULL for SPlus

if(is.null(version$language) == FALSE){ 
  require(alr3)
}else{
  library(alr3)
}

data(BigMac2003)
attach(BigMac2003)

# 7.5.1 (all data points):
# 
postscript("../../WriteUp/Graphics/Chapter7/prob_5_orig_scatter_plot.eps", onefile=FALSE, horizontal=FALSE)
plot( FoodIndex, BigMac )
dev.off()

m0 <- lm( FoodIndex ~ BigMac )
s0 <- summary(m0) 

# look for a transformation of the dependent variable FoodIndex:
#
inv.tran.plot( FoodIndex, BigMac, lam=c(-1,0,+1) ) 

unlist( inv.tran.estimate(FoodIndex,BigMac) ) 

foodIndexT <- powtran( FoodIndex, 0, family="box.cox",modified=TRUE) 

# try to find a transformation of the response "BigMac"
#
m1 <- lm( BigMac ~ foodIndexT )
s1 <- summary(m1)

postscript("../../WriteUp/Graphics/Chapter7/prob_5_inv_trans_plot.eps", onefile=FALSE, horizontal=FALSE)
ans <- inv.tran.plot( BigMac, predict(m1), lam=c(-1,0,1), key=c(145,20) )
dev.off() 

unlist( inv.tran.estimate(BigMac, predict(m1)) )

bigMacT <- log(BigMac)

# fit this log-log nonlinear model:
# 
m2 <- lm( bigMacT ~ foodIndexT )
s2 <- summary(m2)

postscript("../../WriteUp/Graphics/Chapter7/prob_5_log_foodIndex_log_bigMac.eps", onefile=FALSE, horizontal=FALSE)
plot(foodIndexT,bigMacT)
abline(m2)
dev.off()

# compare the fit for each model: 
#
print( c( s0$r.squared, s1$r.squared, s2$r.squared ) )


# 7.5.2 (all data points):
# 
postscript("../../WriteUp/Graphics/Chapter7/prob_5_orig_scatter_plot_matrix.eps", onefile=FALSE, horizontal=FALSE)
pairs( ~Rice+Bread+BigMac )
dev.off() 

ans <- bctrans(BigMac~Rice+Bread,data=BigMac2003)
summary(ans)

postscript("../../WriteUp/Graphics/Chapter7/prob_5_trans_scatter_plot_matrix.eps", onefile=FALSE, horizontal=FALSE)
plot(ans) 
dev.off()

# for the next part of this problem lets estimate the optimal powers for the suggested variables:
# 

# 7.5.3 the inverse fitted value plot (all data points)
#
b <- BigMac2003
b$logBread <- log(Bread)
b$logBus <- log(Bus)
b$logTeachGI <- log(TeachGI)
b$AptToPower <- Apt**0.33

m3 <- lm( BigMac ~ logBread+logBus+logTeachGI+AptToPower, data=b ) # the direct linear regression of BigMac on these variables 
s3 <- summary(m3)

postscript("../../WriteUp/Graphics/Chapter7/prob_5_3_inv_trans_plot.eps", onefile=FALSE, horizontal=FALSE)
ans <- inv.tran.plot( BigMac, predict(m3), lam=c(-1,0,1), key=c(140,20) ) 
dev.off() 

# based on the standard error ... 0 is not unreasnable ... but lets take the estimated value 
unlist( inv.tran.estimate(BigMac, predict(m3)) )

optLambda <- ans$lambda[1] 

# lets see how well this does when we fit with this optimal 
#
b$BigMacToPower <- b$BigMac**(optLambda) 
m4 <- lm( BigMacToPower ~ logBread+logBus+logTeachGI+AptToPower, data=b )
s4 <- summary(m4)

print( c(s3$r.squared,s4$r.squared) ) 

# plot the new inverse response plot ... something is not correct about what I'm doing here ... 
#
#postscript("../../WriteUp/Graphics/Chapter7/prob_5_3_inv_trans_plot_second_time.eps", onefile=FALSE, horizontal=FALSE)
#inv.tran.plot( BigMac, predict(m4)**(1/optLambda) )
#plot( BigMac, predict(m4)**(1/optLambda) )
#dev.off() 


# find the largest two indexes (if we want to remove them):
#
# These two cities are: Karachi & Nairobi
# 
inds1 <- BigMac2003$BigMac==132
inds2 <- BigMac2003$BigMac==185
bigInds <- inds1 | inds2 
BigMac2003$City[bigInds]

# Lets delete the outliers:
a <- data.frame(
                City       = BigMac2003$City[-bigInds],
                BigMac     = BigMac2003$BigMac[-bigInds],
                Bread      = BigMac2003$Bread[-bigInds],
                Rice       = BigMac2003$Rice[-bigInds],
                FoodIndex  = BigMac2003$FoodIndex[-bigInds],
                Bus        = BigMac2003$Bus[-bigInds],
                Apt        = BigMac2003$Apt[-bigInds],
                TeachGI    = BigMac2003$TeachGI[-bigInds],
                TeachNI    = BigMac2003$TeachNI[-bigInds],
                TaxRate    = BigMac2003$TaxRate[-bigInds],
                TeachHours = BigMac2003$TeachHours[-bigInds]
                )

remove(BigMac2003)
attach(a)