# # Epage 164 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # version$language == "R" for R version$language == NULL for SPlus if(is.null(version$language) == FALSE){ require(alr3) }else{ library(alr3) } data(BigMac2003) attach(BigMac2003) # 7.5.1 (all data points): # postscript("../../WriteUp/Graphics/Chapter7/prob_5_orig_scatter_plot.eps", onefile=FALSE, horizontal=FALSE) plot( FoodIndex, BigMac ) dev.off() m0 <- lm( FoodIndex ~ BigMac ) s0 <- summary(m0) # look for a transformation of the dependent variable FoodIndex: # inv.tran.plot( FoodIndex, BigMac, lam=c(-1,0,+1) ) unlist( inv.tran.estimate(FoodIndex,BigMac) ) foodIndexT <- powtran( FoodIndex, 0, family="box.cox",modified=TRUE) # try to find a transformation of the response "BigMac" # m1 <- lm( BigMac ~ foodIndexT ) s1 <- summary(m1) postscript("../../WriteUp/Graphics/Chapter7/prob_5_inv_trans_plot.eps", onefile=FALSE, horizontal=FALSE) ans <- inv.tran.plot( BigMac, predict(m1), lam=c(-1,0,1), key=c(145,20) ) dev.off() unlist( inv.tran.estimate(BigMac, predict(m1)) ) bigMacT <- log(BigMac) # fit this log-log nonlinear model: # m2 <- lm( bigMacT ~ foodIndexT ) s2 <- summary(m2) postscript("../../WriteUp/Graphics/Chapter7/prob_5_log_foodIndex_log_bigMac.eps", onefile=FALSE, horizontal=FALSE) plot(foodIndexT,bigMacT) abline(m2) dev.off() # compare the fit for each model: # print( c( s0$r.squared, s1$r.squared, s2$r.squared ) ) # 7.5.2 (all data points): # postscript("../../WriteUp/Graphics/Chapter7/prob_5_orig_scatter_plot_matrix.eps", onefile=FALSE, horizontal=FALSE) pairs( ~Rice+Bread+BigMac ) dev.off() ans <- bctrans(BigMac~Rice+Bread,data=BigMac2003) summary(ans) postscript("../../WriteUp/Graphics/Chapter7/prob_5_trans_scatter_plot_matrix.eps", onefile=FALSE, horizontal=FALSE) plot(ans) dev.off() # for the next part of this problem lets estimate the optimal powers for the suggested variables: # # 7.5.3 the inverse fitted value plot (all data points) # b <- BigMac2003 b$logBread <- log(Bread) b$logBus <- log(Bus) b$logTeachGI <- log(TeachGI) b$AptToPower <- Apt**0.33 m3 <- lm( BigMac ~ logBread+logBus+logTeachGI+AptToPower, data=b ) # the direct linear regression of BigMac on these variables s3 <- summary(m3) postscript("../../WriteUp/Graphics/Chapter7/prob_5_3_inv_trans_plot.eps", onefile=FALSE, horizontal=FALSE) ans <- inv.tran.plot( BigMac, predict(m3), lam=c(-1,0,1), key=c(140,20) ) dev.off() # based on the standard error ... 0 is not unreasnable ... but lets take the estimated value unlist( inv.tran.estimate(BigMac, predict(m3)) ) optLambda <- ans$lambda[1] # lets see how well this does when we fit with this optimal # b$BigMacToPower <- b$BigMac**(optLambda) m4 <- lm( BigMacToPower ~ logBread+logBus+logTeachGI+AptToPower, data=b ) s4 <- summary(m4) print( c(s3$r.squared,s4$r.squared) ) # plot the new inverse response plot ... something is not correct about what I'm doing here ... # #postscript("../../WriteUp/Graphics/Chapter7/prob_5_3_inv_trans_plot_second_time.eps", onefile=FALSE, horizontal=FALSE) #inv.tran.plot( BigMac, predict(m4)**(1/optLambda) ) #plot( BigMac, predict(m4)**(1/optLambda) ) #dev.off() # find the largest two indexes (if we want to remove them): # # These two cities are: Karachi & Nairobi # inds1 <- BigMac2003$BigMac==132 inds2 <- BigMac2003$BigMac==185 bigInds <- inds1 | inds2 BigMac2003$City[bigInds] # Lets delete the outliers: a <- data.frame( City = BigMac2003$City[-bigInds], BigMac = BigMac2003$BigMac[-bigInds], Bread = BigMac2003$Bread[-bigInds], Rice = BigMac2003$Rice[-bigInds], FoodIndex = BigMac2003$FoodIndex[-bigInds], Bus = BigMac2003$Bus[-bigInds], Apt = BigMac2003$Apt[-bigInds], TeachGI = BigMac2003$TeachGI[-bigInds], TeachNI = BigMac2003$TeachNI[-bigInds], TaxRate = BigMac2003$TaxRate[-bigInds], TeachHours = BigMac2003$TeachHours[-bigInds] ) remove(BigMac2003) attach(a)