# # Epage 163 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # version$language == "R" for R version$language == NULL for SPlus if(is.null(version$language) == FALSE){ require(alr3) }else{ library(alr3) } data(water) attach(water) # 7.3.1: # postscript("../../WriteUp/Graphics/Chapter7/prob_3_orig_scatter_plot.eps", onefile=FALSE, horizontal=FALSE) pairs(~APMAM+APSAB+APSLAKE+OPBPC+OPRC+OPSLAKE+BSAAM) #library(car) #scatterplot.matrix(BSAAM~APMAM+APSAB+APSLAKE+OPBPC+OPRC+OPSLAKE) dev.off() # pick values such that the independent variables are transformed to a set of predictors as close to # linearly related as possible # ans <- bctrans(BSAAM~APMAM+APSAB+APSLAKE+OPBPC+OPRC+OPSLAKE, data=water) summary(ans) # plot a scatter plot matrix of the transformed variables (all values of $lambda=0$ i.e. a logarithmic transformation): # postscript("../../WriteUp/Graphics/Chapter7/prob_3_trans_scatter_plot.eps", onefile=FALSE, horizontal=FALSE) #plot(ans) plot(ans,lambda=c(0,0,0,0,0,0)) dev.off() # lets get these transformed variables into a new data frame this creates logXYZ columns: # waterT <- cbind( BSAAM, powtran(ans,lambda=c(0,0,0,0,0,0),family="power") ) # lets now look for a transformation of the response BSAAM: # mxt <- lm( BSAAM~logAPMAM+logAPSAB+logAPSLAKE+logOPBPC+logOPRC+logOPSLAKE, data=waterT ) #inv.tran.plot( BSAAM, predict(mxt), key=c(12000,80000) ) # or the easier to type: # postscript("../../WriteUp/Graphics/Chapter7/prob_3_inv_res_plot.eps", onefile=FALSE, horizontal=FALSE) inv.res.plot(mxt) dev.off() # what is the standard error of this estimate of $\lambda_Y$ unlist( inv.tran.estimate(BSAAM,predict(mxt)) ) # lets fit the model suggested: # waterT$logBSAAM <- log(waterT$BSAAM) m <- lm( logBSAAM ~ logAPMAM+logAPSAB+logAPSLAKE+logOPBPC+logOPRC+logOPSLAKE, data=waterT ) summary(m) # lets consider the simpler model where the three cofficients of logOPBPC,logOPRC,logOPSLAKE are approximatly equal: # ms <- lm( logBSAAM ~ logAPMAM+logAPSAB+logAPSLAKE+I(logOPBPC+logOPRC+logOPSLAKE), data=waterT ) # now compare the two models: # anova(ms,m) # mss <- lm( logBSAAM ~ logAPSLAKE+I(logOPBPC+logOPRC+logOPSLAKE), data=waterT ) anova(mss,ms,m)