source('../../Data/data_loaders.R')

DF = load_appendix_sediment_data()

# Part (a):
#
m = lm( YIELD ~ RUN + PREC, data=DF )
print( summary(m) )

# Part (b):
#
#postscript("../../WriteUp/Graphics/Chapter4/ex_4_6_residuals_vs_fitted.eps", onefile=FALSE, horizontal=FALSE)
plot( m, which=1 )
#dev.off()

# Part (c):
#
library(MASS)

#postscript("../../WriteUp/Graphics/Chapter4/ex_4_6_boxcox.eps", onefile=FALSE, horizontal=FALSE)
bc = boxcox(m)
#dev.off()

source('../Chapter3/utils.R')

atk_res = Atkinsons( DF, 'YIELD', c('RUN', 'PREC') )
print(sprintf("Atkinsons lambda estimate: %.3f (t_value %.3f)", atk_res$lambda_hat, atk_res$gamma_t))
bt_res = Box_Tidwell( DF, 'YIELD', c('RUN', 'PREC') )
print(bt_res)


# Part (d):
#
LDF = log(DF)
colnames(LDF) = c('LRUN', 'LPREC', 'LYIELD')

m_logx_logy = lm( LYIELD ~ LRUN + LPREC, data=LDF ) # a logarithmic transformation applied to everything
print( summary( m_logx_logy ) )

# Part (e):
#
cm = data.frame( t(colMeans(DF)) )
cm$YIELD = NULL
print(cm)

p = predict( m, newdata=cm, interval='prediction' )
lcm = log(cm) # we take the means first and then the logarithm of these means
colnames(lcm) = c('LRUN', 'LPREC')
p_log = predict( m_logx_logy, newdata=lcm, interval='prediction' )
exp(p_log)
R = rbind( p, exp(p_log) )
rownames(R) = c('orig_model', 'log_model')
R = cbind( R, range= (R[,'upr'] - R[,'lwr']) )
print(R)

# Part (f):
#
#postscript("../../WriteUp/Graphics/Chapter4/ex_4_6_residuals_vs_fitted_log_model.eps", onefile=FALSE, horizontal=FALSE)
plot( m_logx_logy, which=1 )
#dev.off()