# # Epage 164 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # version$language == "R" for R version$language == NULL for SPlus if(is.null(version$language) == FALSE){ require(alr3) }else{ library(alr3) } data(salarygov) attach(salarygov) # 7.4.2: # postscript("../../WriteUp/Graphics/Chapter7/prob_4_orig_scatter_plot.eps", onefile=FALSE, horizontal=FALSE) plot( Score, MaxSalary ) dev.off() # lets begin with a power transformation of the Score variable: # postscript("../../WriteUp/Graphics/Chapter7/prob_4_dependent_var_transform.eps", onefile=FALSE, horizontal=FALSE) ans <- inv.tran.plot( Score, MaxSalary, lam=c(-1,0,+1) ) dev.off() unlist( inv.tran.estimate(Score,MaxSalary) ) optLambda <- ans$lambda[1] # lets get these transformed Score values and try to find a transform of the Y variable: # scoreT <- powtran(Score,optLambda,family="box.cox",modified=FALSE) mxt <- lm( MaxSalary ~ scoreT ) print(summary(mxt)) inv.tran.plot( MaxSalary, predict(mxt) ) unlist( inv.tran.estimate( MaxSalary, predict(mxt) ) ) # note that # 7.4.3 ... skipped # ... I was not sure how to apply these transformations with factors # percentToBeFemaleDominated <- 0.7 #salarygov$pcntWomen <- salarygov$NW / salarygov$NE salarygov$pcntWomen <- (salarygov$NW+1) / (salarygov$NE+1) # we add one to avoid pcntWomen values that are too low. inds <- salarygov$pcntWomen > percentToBeFemaleDominated salarygov$wFactor <- factor( inds, ordered=FALSE ) fm <- lm( MaxSalary ~ -1 + wFactor + Score, data=salarygov ) # 7.4.4 ... use both NW/NE and Score to predict MaxSalary ... these don't look correct ... # the transformed result is worse than the untransformed result. # m2 <- lm( MaxSalary ~ Score + pcntWomen, data=salarygov ) # consider a linear model with no transformations summary(m2) attach(salarygov) pairs( MaxSalary ~ Score + pcntWomen ) ans <- bctrans(MaxSalary ~ Score + pcntWomen, data=salarygov ) #plot(ans) s <- summary(ans) optLambda <- s$optim$par scoreT <- powtran( Score, optLambda[1] ) pcntWomenT <- powtran( pcntWomen, optLambda[2] ) m3 <- lm( MaxSalary ~ powtran( Score, optLambda[1] ) + powtran( pcntWomen, optLambda[2] ) )