# Section 3; Question 1:
#
age = seq( 1, 4 )
distance = c( 12.6, 11.6, 6.8, 9.2 )

plot( age, distance, type='p', pch=19, cex=1.5, xlab='Age', ylab='Distance' )
m = lm( distance ~ age )
abline(m, col='red')
grid()

sm = summary(m)
print( coefficients(m) )

p_value = sm$coefficients[2,4]
print( sprintf('p_value= %.4f', p_value ) )


# Section 3; Question 2:
#
source('chap_11_sect_2_question_7_data.R')
n = dim(DF)[1]
beta_hat_0 = 81.088
beta_hat_1 = 0.412
s = 11.78848
alpha = 0.05

t_crit = qt( 1 - alpha/2, n-2 )
denom = sqrt( sum( ( DF$Spending_Per_Pupil - mean(DF$Spending_Per_Pupil) )^2 ) )

ci = beta_hat_1 + ( ( s / denom ) * t_crit ) * c(-1, +1)
print( ci )

postscript("../../WriteUp/Graphics/Chapter11/chap_11_3_2_plot.eps", onefile=FALSE, horizontal=FALSE)
plot( DF$Spending_Per_Pupil, DF$Graduation_Rate, type='p', pch=19, cex=1.5, xlab='Spending Per Pupil', ylab='Graduation Rate' )
m = lm( Graduation_Rate ~ Spending_Per_Pupil, data=DF )
abline(m, col='red')
grid()
dev.off()


# Section 3; Question 3:
#
source('chap_11_sect_2_question_1_data.R' )
m = lm( Temperature ~ Chirps_Per_Second, data=DF )
sm = summary(m)


# Section 3; Question 5:
#
n = 9
x = seq( 1, n )
sigma2 = 45
se_beta_1 = sqrt( sigma2 / sum( ( x - mean(x) )^2 ) )
pnorm( 1.5/se_beta_1 ) - pnorm( -1.5/se_beta_1 )


# Section 3; Question 7:
#
source('chap_11_sect_2_question_3_data.R')
m = lm( Parts_Dissolved ~ Temperature, data=DF )
sm = summary(m)
n = dim(DF)[1]
beta_0_hat = sm$coefficients[1,1]
se_beta_0 = sm$coefficients[1,2]

alpha = 0.10
t_crit = qt( 1 - alpha/2, n-2 )
ci = beta_0_hat + se_beta_0 * t_crit * c(-1, +1)
print(ci)


# Section 3; Question 8:
#
source('chap_11_sect_2_question_9_data.R')
m = lm( Cancer_Mortality ~ Index_of_Exposure, data=DF )
sm = summary(m)


# Section 3; Question 9:
#
source('chap_11_sect_2_question_11_data.R')
m = lm( Behavioral_Index ~ Plumage_Index, data=DF )
sm = summary(m)


# Section 3; Question 13:
#
sigma2_0 = 12.6
n = 24
s2 = 18.2

alpha = 0.05

chi2_stat = (n-2)*s2/sigma2_0

ch2_H0_acceptance_lower_limit = qchisq(   alpha/2, n-2 )
ch2_H0_acceptance_upper_limit = qchisq( 1-alpha/2, n-2 )
print( sprintf( 'H0: acceptance region (%.3f, %.3f); observed value: %.3f', ch2_H0_acceptance_lower_limit, ch2_H0_acceptance_upper_limit, chi2_stat) )


# Section 3; Question 14:
#
source('case_study_11_3_1_data.R')
m = lm( CHD_Mortality ~ Consumption, data=DF )
sm = summary(m)

n = dim(DF)[1]
alpha = 0.1
chi2_crit_1 = qchisq(   alpha/2, n-2 )
chi2_crit_2 = qchisq( 1-alpha/2, n-2 )

S2 = sm$sigma^2
ci = c( (n-2)*S2/chi2_crit_2, (n-2)*S2/chi2_crit_1 )
print(ci)


# Section 3; Question 15:
#
source('../Chapter8/chap_8_sect_2_question_1_data.R')
m = lm( y_astronomical_units ~ x_years, data=DF )
sm= summary(m)

n = dim(DF)[1]
alpha = 0.1
chi2_crit_1 = qchisq(   alpha/2, n-2 )
chi2_crit_2 = qchisq( 1-alpha/2, n-2 )
S2 = sm$sigma^2
ci = c( (n-2)*S2/chi2_crit_2, (n-2)*S2/chi2_crit_1 )
print(ci)


# Section 3; Question 16:
#
source('chap_11_sect_3_question_16_data.R')
m = lm( Volume ~ Weight, data=DF )
print( predict( m, newdata=data.frame(Weight=14.0), interval='confidence', level=0.95 ) )
print( predict( m, newdata=data.frame(Weight=14.0), interval='prediction', level=0.95 ) )


# Section 3; Question 17:
#
source('case_study_11_2_1_data.R')
m = lm( Finished_Weight ~ Rough_Weight, data=DF )
print( predict( m, newdata=data.frame(Rough_Weight=2.75), interval='confidence', level=0.95 ) )


# Section 3; Question 18:
#
source('case_study_11_3_1_data.R')
m = lm( CHD_Mortality ~ Consumption, data=DF )
print( predict( m, newdata=data.frame(Consumption=2500), interval='confidence', level=0.99 ) )


# Section 3; Question 19:
#
source('chap_11_sect_3_question_19_data.R')
m = lm( Median_Salary ~ Tuition, data=DF )
print( predict( m, newdata=data.frame(Tuition=102), interval='confidence', level=0.95 ) )
print( predict( m, newdata=data.frame(Tuition=102), interval='prediction', level=0.95 ) )


# Section 3; Question 20:
#
source('chap_11_sect_2_question_9_data.R')
m = lm( Cancer_Mortality ~ Index_of_Exposure, data=DF )
print( predict( m, newdata=data.frame(Index_of_Exposure=9.00), interval='confidence', level=0.95 ) )
print( predict( m, newdata=data.frame(Index_of_Exposure=9.00), interval='prediction', level=0.95 ) )


# Section 3; Question 21:
#
m = 8 # women managers
beta_hat_0_star = 23.2
beta_hat_1_star = 1.07
rss_star = 13.804
scatter_star = 46

n = 6 # male managers
beta_hat_0 = 21.3
beta_hat_1 = 0.606
rss = 5.983
scatter = 31.33 # = sum_{i=1}^6 (x_i - bar_x)^2
s = sqrt( (rss + rss_star) / (n+m-4) )

numer = beta_hat_1 - beta_hat_1_star
denom = s * sqrt( 1/scatter + 1/scatter_star )
t = numer / denom
p_value = 2*(1-pt(abs(t), n+m-4))
print( sprintf('t= %.3f; p-value %.3f', t, p_value) )


# Section 3; Question 22:
#
source('chap_11_sect_3_question_22_data.R')
df_demo = DF[ DF$Type=='Democratic', ]
m_demo = lm( Percent ~ Years, data=df_demo )
df_repu = DF[ DF$Type=='Republican', ]
m_repu = lm( Percent ~ Years, data=df_repu )

n = dim(df_demo)[1]
rss = sum( m_demo$residuals^2 )
scatter = sum( ( df_demo$Years - mean(df_demo$Years) )^2)

m = dim(df_repu)[1]
rss_star = sum( m_repu$residuals^2 )
scatter_star = sum( ( df_repu$Years - mean(df_repu$Years) )^2)

s = sqrt( (rss + rss_star) / (n+m-4) )
numer = coefficients(m_demo)[2] - coefficients(m_repu)[2]
denom = s * sqrt( 1/scatter + 1/scatter_star )
t = numer / denom

p_value = 2*(1-pt(abs(t), n+m-4))
print( sprintf('t= %.3f; p-value= %.3f', t, p_value) )