# EPage 117
#
source('../../Data/data_loaders.R')

# Get the Forbes data:
#
DF_forbes = load_forbes_data() # fields are: pressure and boiling_point

# Get the Hooker data:
#
DF_hooker = load_appendix_Hookers_data() # fields are: TEMP and PRESS

# Combine the two:
#
DF = data.frame( TEMP=c(DF_forbes$boiling_point, DF_hooker$TEMP), PRESS=c(DF_forbes$pressure, DF_hooker$PRESS) )


# Part (a):
#
m = lm( PRESS ~ TEMP, data=DF )

# Lets plot the data and models:
#postscript("../../WriteUp/Graphics/Chapter3/ex_3_9_part_a.eps", onefile=FALSE, horizontal=FALSE)
par(mfrow=c(1,3))
plot( DF$TEMP, DF$PRESS, type='p', pch=19, cex=1.5, xlab='TEMP', ylab='PRESS', main='data and linear model' )
abline(m, col='red')
grid()
plot( m, which=1 )
grid()
plot( m, which=2 )
grid()
par(mfrow=c(1,1))
#dev.off()


# Part (b):
#
library(MASS)
bc = boxcox(m)

# What lambda value gives the largest log-likelihood:
#
print(sprintf("Boxcox lambda estimate: %.3f", bc$x[ which.max(bc$y) ]))

# Lets try Atkinsons method:
#
source('utils.R')

atk_res = Atkinsons( DF, 'PRESS', c( 'TEMP' ) )
print(sprintf("Atkinsons lambda estimate: %.3f (t-value %.3f)", atk_res$lambda_hat, atk_res$gamma_t))
 

DF$LOGPRESS = log(DF$PRESS)
m_log = lm( LOGPRESS ~ TEMP, data=DF )


# Part (c): Implement the Box-Tidwell method to study transformation of T_R
#
bt_res = Box_Tidwell( DF, 'PRESS', c( 'TEMP' ) )
print(sprintf("Box-Tidwell %s alpha estimate: %.3f (t-value %.3f)", bt_res$variable_name[1], bt_res$alpha[1], bt_res$t_value[1]))

m_bt = lm( PRESS ~ I(TEMP^4.8), data=DF )


# Plot the data and our three models:
#
#postscript("../../WriteUp/Graphics/Chapter3/ex_3_9_part_b.eps", onefile=FALSE, horizontal=FALSE)
plot( DF$TEMP, DF$PRESS, type='p', pch=19, cex=1.0, xlab='TEMP', ylab='PRESS' )
all_TEMPs = unique( DF$TEMP )
p = predict( m, newdata=data.frame( TEMP=all_TEMPs ) )
points( all_TEMPs, p, type='l', col='blue' )
p = predict( m_log, newdata=data.frame( TEMP=all_TEMPs ) )
points( all_TEMPs, exp(p), type='l', col='green' )
p = predict( m_bt, newdata=data.frame( TEMP=all_TEMPs ) )
points( all_TEMPs, p, type='l', col='purple' )
legend( 'topleft',
        c('PRESS = beta_0 + beta_1 TEMP', 'PRESS=exp(beta_0 + beta_1 TEMP)', 'PRESS=beta_0 + beta_1 TEMP^4.8'),
        col=c('blue', 'green', 'purple'),
        lty=c(1, 1, 1) )
grid()
#dev.off()