#
# Epage 164
#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

# version$language == "R" for R version$language == NULL for SPlus

if(is.null(version$language) == FALSE){ 
  require(alr3)
}else{
  library(alr3)
}

data(salarygov)
attach(salarygov)

# 7.4.2:
# 
postscript("../../WriteUp/Graphics/Chapter7/prob_4_orig_scatter_plot.eps", onefile=FALSE, horizontal=FALSE)

plot( Score, MaxSalary )

dev.off()

# lets begin with a power transformation of the Score variable:
#
postscript("../../WriteUp/Graphics/Chapter7/prob_4_dependent_var_transform.eps", onefile=FALSE, horizontal=FALSE)

ans <- inv.tran.plot( Score, MaxSalary, lam=c(-1,0,+1) )

dev.off()

unlist( inv.tran.estimate(Score,MaxSalary) ) 
                          
optLambda <- ans$lambda[1]

# lets get these transformed Score values and try to find a transform of the Y variable: 
#
scoreT <- powtran(Score,optLambda,family="box.cox",modified=FALSE)

mxt <- lm( MaxSalary ~ scoreT ) 
print(summary(mxt))

inv.tran.plot( MaxSalary, predict(mxt) )

unlist( inv.tran.estimate( MaxSalary, predict(mxt) )  ) # note that 

# 7.4.3 ... skipped
#       ... I was not sure how to apply these transformations with factors
#
percentToBeFemaleDominated <- 0.7

#salarygov$pcntWomen <- salarygov$NW / salarygov$NE
salarygov$pcntWomen <- (salarygov$NW+1) / (salarygov$NE+1) # we add one to avoid pcntWomen values that are too low.
inds <- salarygov$pcntWomen > percentToBeFemaleDominated 

salarygov$wFactor <- factor( inds, ordered=FALSE ) 

fm <- lm( MaxSalary ~ -1 + wFactor + Score, data=salarygov ) 

# 7.4.4 ... use both NW/NE and Score to predict MaxSalary ... these don't look correct ...
#           the transformed result is worse than the untransformed result.
# 
m2 <- lm( MaxSalary ~ Score + pcntWomen, data=salarygov ) # consider a linear model with no transformations 
summary(m2)

attach(salarygov)
pairs( MaxSalary ~ Score + pcntWomen ) 
ans <- bctrans(MaxSalary ~ Score + pcntWomen, data=salarygov ) 

#plot(ans)

s <- summary(ans)

optLambda <- s$optim$par

scoreT <- powtran( Score, optLambda[1] )
pcntWomenT <- powtran( pcntWomen, optLambda[2] )

m3 <- lm( MaxSalary ~ powtran( Score, optLambda[1] ) + powtran( pcntWomen, optLambda[2] ) )