# Section 3; Question 1: # age = seq( 1, 4 ) distance = c( 12.6, 11.6, 6.8, 9.2 ) plot( age, distance, type='p', pch=19, cex=1.5, xlab='Age', ylab='Distance' ) m = lm( distance ~ age ) abline(m, col='red') grid() sm = summary(m) print( coefficients(m) ) p_value = sm$coefficients[2,4] print( sprintf('p_value= %.4f', p_value ) ) # Section 3; Question 2: # source('chap_11_sect_2_question_7_data.R') n = dim(DF)[1] beta_hat_0 = 81.088 beta_hat_1 = 0.412 s = 11.78848 alpha = 0.05 t_crit = qt( 1 - alpha/2, n-2 ) denom = sqrt( sum( ( DF$Spending_Per_Pupil - mean(DF$Spending_Per_Pupil) )^2 ) ) ci = beta_hat_1 + ( ( s / denom ) * t_crit ) * c(-1, +1) print( ci ) postscript("../../WriteUp/Graphics/Chapter11/chap_11_3_2_plot.eps", onefile=FALSE, horizontal=FALSE) plot( DF$Spending_Per_Pupil, DF$Graduation_Rate, type='p', pch=19, cex=1.5, xlab='Spending Per Pupil', ylab='Graduation Rate' ) m = lm( Graduation_Rate ~ Spending_Per_Pupil, data=DF ) abline(m, col='red') grid() dev.off() # Section 3; Question 3: # source('chap_11_sect_2_question_1_data.R' ) m = lm( Temperature ~ Chirps_Per_Second, data=DF ) sm = summary(m) # Section 3; Question 5: # n = 9 x = seq( 1, n ) sigma2 = 45 se_beta_1 = sqrt( sigma2 / sum( ( x - mean(x) )^2 ) ) pnorm( 1.5/se_beta_1 ) - pnorm( -1.5/se_beta_1 ) # Section 3; Question 7: # source('chap_11_sect_2_question_3_data.R') m = lm( Parts_Dissolved ~ Temperature, data=DF ) sm = summary(m) n = dim(DF)[1] beta_0_hat = sm$coefficients[1,1] se_beta_0 = sm$coefficients[1,2] alpha = 0.10 t_crit = qt( 1 - alpha/2, n-2 ) ci = beta_0_hat + se_beta_0 * t_crit * c(-1, +1) print(ci) # Section 3; Question 8: # source('chap_11_sect_2_question_9_data.R') m = lm( Cancer_Mortality ~ Index_of_Exposure, data=DF ) sm = summary(m) # Section 3; Question 9: # source('chap_11_sect_2_question_11_data.R') m = lm( Behavioral_Index ~ Plumage_Index, data=DF ) sm = summary(m) # Section 3; Question 13: # sigma2_0 = 12.6 n = 24 s2 = 18.2 alpha = 0.05 chi2_stat = (n-2)*s2/sigma2_0 ch2_H0_acceptance_lower_limit = qchisq( alpha/2, n-2 ) ch2_H0_acceptance_upper_limit = qchisq( 1-alpha/2, n-2 ) print( sprintf( 'H0: acceptance region (%.3f, %.3f); observed value: %.3f', ch2_H0_acceptance_lower_limit, ch2_H0_acceptance_upper_limit, chi2_stat) ) # Section 3; Question 14: # source('case_study_11_3_1_data.R') m = lm( CHD_Mortality ~ Consumption, data=DF ) sm = summary(m) n = dim(DF)[1] alpha = 0.1 chi2_crit_1 = qchisq( alpha/2, n-2 ) chi2_crit_2 = qchisq( 1-alpha/2, n-2 ) S2 = sm$sigma^2 ci = c( (n-2)*S2/chi2_crit_2, (n-2)*S2/chi2_crit_1 ) print(ci) # Section 3; Question 15: # source('../Chapter8/chap_8_sect_2_question_1_data.R') m = lm( y_astronomical_units ~ x_years, data=DF ) sm= summary(m) n = dim(DF)[1] alpha = 0.1 chi2_crit_1 = qchisq( alpha/2, n-2 ) chi2_crit_2 = qchisq( 1-alpha/2, n-2 ) S2 = sm$sigma^2 ci = c( (n-2)*S2/chi2_crit_2, (n-2)*S2/chi2_crit_1 ) print(ci) # Section 3; Question 16: # source('chap_11_sect_3_question_16_data.R') m = lm( Volume ~ Weight, data=DF ) print( predict( m, newdata=data.frame(Weight=14.0), interval='confidence', level=0.95 ) ) print( predict( m, newdata=data.frame(Weight=14.0), interval='prediction', level=0.95 ) ) # Section 3; Question 17: # source('case_study_11_2_1_data.R') m = lm( Finished_Weight ~ Rough_Weight, data=DF ) print( predict( m, newdata=data.frame(Rough_Weight=2.75), interval='confidence', level=0.95 ) ) # Section 3; Question 18: # source('case_study_11_3_1_data.R') m = lm( CHD_Mortality ~ Consumption, data=DF ) print( predict( m, newdata=data.frame(Consumption=2500), interval='confidence', level=0.99 ) ) # Section 3; Question 19: # source('chap_11_sect_3_question_19_data.R') m = lm( Median_Salary ~ Tuition, data=DF ) print( predict( m, newdata=data.frame(Tuition=102), interval='confidence', level=0.95 ) ) print( predict( m, newdata=data.frame(Tuition=102), interval='prediction', level=0.95 ) ) # Section 3; Question 20: # source('chap_11_sect_2_question_9_data.R') m = lm( Cancer_Mortality ~ Index_of_Exposure, data=DF ) print( predict( m, newdata=data.frame(Index_of_Exposure=9.00), interval='confidence', level=0.95 ) ) print( predict( m, newdata=data.frame(Index_of_Exposure=9.00), interval='prediction', level=0.95 ) ) # Section 3; Question 21: # m = 8 # women managers beta_hat_0_star = 23.2 beta_hat_1_star = 1.07 rss_star = 13.804 scatter_star = 46 n = 6 # male managers beta_hat_0 = 21.3 beta_hat_1 = 0.606 rss = 5.983 scatter = 31.33 # = sum_{i=1}^6 (x_i - bar_x)^2 s = sqrt( (rss + rss_star) / (n+m-4) ) numer = beta_hat_1 - beta_hat_1_star denom = s * sqrt( 1/scatter + 1/scatter_star ) t = numer / denom p_value = 2*(1-pt(abs(t), n+m-4)) print( sprintf('t= %.3f; p-value %.3f', t, p_value) ) # Section 3; Question 22: # source('chap_11_sect_3_question_22_data.R') df_demo = DF[ DF$Type=='Democratic', ] m_demo = lm( Percent ~ Years, data=df_demo ) df_repu = DF[ DF$Type=='Republican', ] m_repu = lm( Percent ~ Years, data=df_repu ) n = dim(df_demo)[1] rss = sum( m_demo$residuals^2 ) scatter = sum( ( df_demo$Years - mean(df_demo$Years) )^2) m = dim(df_repu)[1] rss_star = sum( m_repu$residuals^2 ) scatter_star = sum( ( df_repu$Years - mean(df_repu$Years) )^2) s = sqrt( (rss + rss_star) / (n+m-4) ) numer = coefficients(m_demo)[2] - coefficients(m_repu)[2] denom = s * sqrt( 1/scatter + 1/scatter_star ) t = numer / denom p_value = 2*(1-pt(abs(t), n+m-4)) print( sprintf('t= %.3f; p-value= %.3f', t, p_value) )