#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----

# Ex 13.26 (EPage 525):
#
DF = read.csv( "../../Data/CH13/ex13-26.txt", header=TRUE, quote="'" ) 
plot( DF$moiscont, DF$bulkdens )

n = 6
k = 2
t_value = qt( 1-0.01/2, n-k-1 )

491.10 + c(-1,+1) * 6.52 * t_value


# Ex 13.27 (EPage 526):
#
DF = read.csv( "../../Data/CH13/ex13-27.txt", header=TRUE, quote="'" ) 
plot( DF$x, DF$y )


x = 6
y_hat = 84.482 - 15.875 * x + 1.7679 * x^2
e = 53 - y_hat

SSE = 61.77
SST = sum( ( DF$y - mean(DF$y) )^2 )
r2 = 1 - SSE/SST

stand_resids = c( 1.91, -1.95, -0.25, 0.58, 0.90, 0.04, -0.66, 0.2 )
plot( DF$x, stand_resids )

qqnorm( stand_resids )
qqline( stand_resids )

s_yhat = 1.69
n = length(DF$x)
k = 2
t_value = qt( 1-0.05/2, n-(k+1) )
ci = y_hat + c(-1,+1) * s_yhat * t_value

MSE = SSE / (n-(k+1))
s = sqrt( MSE )

pi = y_hat + c(-1,+1) * sqrt( s_yhat^2 + s^2 ) * t_value


# Ex 13.28 (EPage 526):
#
n = 6
y_hat_fn = function(x){ -113.0937 + 3.3684*x - 0.0178 * x**2 }

y_hat_fn( 75 )

y_hat_fn( 60 )

SSE = 8386.43 - (-113.0937) * 210.7 - (3.3684) * 17002 - (- 0.0178) * 1419780
k = 2
MSE = SSE / ( n-(k+1) ) 
s = sqrt(MSE)

SST = 987.35
r2 = 1 - SSE / SST

beta_2_hat = - 0.0178 
s_hat_beta_2 = 0.00226
t_beta_2 = beta_2_hat / s_hat_beta_2

pt( t_beta_2, n-(k+1) ) # the probabilty that we could get a value this low or lower by chance


# Ex 13.29 (EPage 526):
#
DF = read.csv( "../../Data/CH13/ex13-29.txt", header=TRUE, quote="'" ) 
plot( DF$x, DF$y )

n = length(DF$x)

y_hat_fn = function(x){ -295.96 + 2.1885 * x - 0.0031662 * x^2 }

y_hat = y_hat_fn( DF$x )

DF$y - y_hat 

SSE = sum( ( DF$y - y_hat )^2 )
k=2
s2 = SSE/(n-(k+1))

SST = sum( (DF$y - mean(DF$y))^2 )
r2 = 1 - SSE/SST


s_beta_2_hat = 0.0004835
t_beta_2 = - 0.0031662/s_beta_2_hat
pt( t_beta_2, n-(k+1) )

 
s_beta_1_hat = 0.4050

alpha = 0.05/2 # gives 95% combined confidence interval 
alpha = 0.04/2 # gives 96% combined confidence interval 
t_value = qt( 1 - alpha/2, n-(k+1) ) # need 1/2 to get two sided confidence intervals

print( 2.1885 + c(-1,+1) * s_beta_1_hat * t_value )
print( - 0.0031662 + c(-1,+1) * s_beta_2_hat * t_value )

s_y_hat = 1.198
t_value = qt( 1 - 0.05/2, n-(k+1) )

ci = y_hat_fn( 400 ) + c(-1,+1) * s_y_hat * t_value

pi = y_hat_fn( 400 ) + c(-1,+1) * sqrt( s2 + s_y_hat^2 ) * t_value


# Ex 13.30 (EPage 526):
#
#DF = read.csv( "../../Data/CH13/ex13-30.txt", header=TRUE, quote="'" ) # this data does not match that given in the problem 

n = 14 

y_hat_fn = function(x){ -251.6 + 1000.1 * x - 135.44 * x^2 }

beta_2_hat = -135.44
s_beta_2_hat = 41.97
k = 2 
t_value = qt( 1-0.05/2, n-(k+1) )

ci = beta_2_hat + c(-1,+1) * s_beta_2_hat * t_value

s_y_hat = 53.5
y_hat = y_hat_fn( 2.5 )

t_value = ( y_hat - 1500 )/s_y_hat

qt( 0.05, n-(k+1) )

t_value = qt( 1-0.05/2, n-(k+1) ) 

pi = y_hat_fn( 2.5 ) + c(-1,+1) * s_y_hat * t_value


# Ex 13.31 (EPage 527/Anwser EPage 706):
#
#DF = read.csv( "../../Data/CH13/ex13-31.txt", header=TRUE, quote="'" ) # this data does not match that given in the problem 

DF = data.frame( x=c(1, 1, 2, 4, 4, 4, 6), y=c(23., 24.5, 28., 30.9, 32.0, 33.6, 20.0) )
plot( DF$x, DF$y )

m = lm( y ~ x + I(x^2), data=DF )
sm = summary(m)

# the predicted values and residuals:
y_hat = m$fitted.values
residuals = m$residuals 

plot( DF$x, residuals )

sigma_y_hats = c(0.955,0.955,0.712,0.777,0.777,0.777,1.407)

var_residuals = sm$sigma^2 - sigma_y_hats^2
std_residuals = sqrt( var_residuals )

standardized_residuals = residuals / std_residuals

plot( DF$x, standardized_residuals )

standardized_residuals
residuals/sm$sigma 

n = length(DF$x)
k = 2 
t_value = qt( 1 - 0.05/2, n-(k+1) ) 
y_hat_sigma = 0.777
pi = predict( m, newdata=data.frame( x=4 ) ) + c(-1,+1) * sqrt( y_hat_sigma^2 + sm$sigma^2 ) * t_value


# Ex 13.32 (EPage 527/Anwser EPage 706):
#
DF = read.csv( "../../Data/CH13/ex13-32.txt", header=TRUE, quote="'" )
n = length(DF$x)

plot( DF$x, DF$y )

# estimate the mean of x and subtract it from each sample:
x_bar = mean(DF$x)

DF$xp = DF$x - x_bar # x prime 

m = lm( y ~ xp + I(xp^2) + I(xp^3), data=DF )
sm = summary(m)

( -3*x_bar ) * (-2.3968) + 2.3964 

predict( m, newdata=data.frame( xp=(4.5-x_bar) ) )


# Ex 13.33 (EPage 527/Anwser EPage 706):
#
DF = read.csv( "../../Data/CH13/ex13-33.txt", header=TRUE, quote="'" )
n = length(DF$x)

x_mean = mean(DF$x)
x_std  = sqrt( (n-1) * sd(DF$x)^2 / n )

y_hat_fn = function(x){ 0.9671 - 0.0502 * x - 0.0176 * x^2 + 0.0062 * x^3 }

y_hat_fn( (20-x_bar)/x_std )
y_hat_fn( (25-x_bar)/x_std )


t_value = 0.0062 / 0.0031
qt( 1-0.05/2, 7-4 )

SST = sum( ( DF$y - mean(DF$y) )^2 ) 

SSE_3 = 0.000063
R2_3 = 1 - SSE_3/SST # R^2 for cubic model 
SSE_2 = 0.00014367
R2_2 = 1 - SSE_2/SST # R^2 for the quadradic model

# Compute the adjusted R^2 for each model:
#
k = 3
R2_adj_3 = ((n-1)*R2_3 - k)/(n-1-k) # the cubic model
k = 2
R2_adj_2 = ((n-1)*R2_2 - k)/(n-1-k) # the quadradic model 


# Ex 13.34 (EPage 528/Anwser EPage 706):
#
DF = read.csv( "../../Data/CH13/ex13-34.txt", header=TRUE, quote="'" )
n = length(DF$x)

x_bar = mean(DF$x)
s_x = sd(DF$x)
DF$xp = ( DF$x - x_bar ) / s_x

# given in the book 
beta_0 = 0.8733
beta_1 = -0.3255
beta_2 = 0.0448 
  
y_hat_fn = function(x){ beta_0 + beta_1 * x + beta_2 * x^2 }

y_hat_fn( ( 50-x_bar ) / s_x )

SST = sum( ( DF$y - mean(DF$y) )^2 )
SSE = sum( DF$y^2 ) - beta_0 * sum( DF$y ) - beta_1 * sum( DF$y * DF$xp ) - beta_2 * sum( DF$y * DF$xp^2 ) 
R2 = 1 - SSE/SST

t_value = beta_2 / 0.0319
k = 2 
qt( 1-0.05/2, n-(k+1) )


# Ex 13.35 (EPage 527/Anwser EPage 706):
#
DF = read.csv( "../../Data/CH13/ex13-34.txt", header=TRUE, quote="'" )
n = length(DF$x)

DF$ly = log(DF$y)
m = lm( ly ~ x + I(x^2), data=DF )
sm = summary(m)
sm