options(error=recover) # Section 4; Question 1: # source ( 'chap_10_sect_4_question_1_data.R' ) sum_yes = sum( DF$Number_Saying_Yes * DF$Frequency ) sum_asked = 3 * sum( DF$Frequency ) p = sum_yes / sum_asked print( p ) s = 1 # one parameter estimated prob = dbinom( 0:3, 3, p ) t = length(prob) expected_frequency = 200 * prob d = sum( (DF$Frequency - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 2: # source('chap_10_sect_4_question_2_data.R') n = sum( DF$Number_of_Years ) lambda = sum( DF$Number_of_Vacancies * DF$Number_of_Years ) / n print( lambda ) s = 1 # one parameter estimated prob = dpois( min(DF$Number_of_Vacancies):max(DF$Number_of_Vacancies), lambda ) t = length(prob) expected_frequency = n * prob d = sum( (DF$Number_of_Years - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.01, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 3: # source('chap_10_sect_4_question_3_data.R') n = sum( DF$Number_of_Quadrats ) lambda = sum( DF$Number_of_Infected_Plants * DF$Number_of_Quadrats ) / n print( lambda ) s = 1 # one parameter estimated prob = dpois( min(DF$Number_of_Infected_Plants):max(DF$Number_of_Infected_Plants), lambda ) t = length(prob) expected_frequency = n * prob print( round( expected_frequency, 2 ) ) # gives some bins with less than 5 samples expected_frequency = c( expected_frequency[1:6], sum( expected_frequency[7:t] ) ) print( round( expected_frequency, 2 ) ) xs = c( DF$Number_of_Quadrats[1:6], sum( DF$Number_of_Quadrats[7:t] ) ) t = length(expected_frequency) d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 4: # source('../Chapter4/chap_4_sect_2_question_10_data.R') n = sum( DF$No_of_Years ) lambda = sum( DF$No_of_Deaths * DF$No_of_Years ) / n print( lambda ) s = 1 # one parameter estimated prob = dpois( min(DF$No_of_Deaths):max(DF$No_of_Deaths), lambda ) t = length(prob) expected_frequency = n * prob d = sum( (DF$No_of_Years - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.01, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 5: # source('chap_10_sect_4_question_5_data.R') n = sum( DF$Number_Observed ) # To compute the estimate of lambda in the exponential distribution assume # that each sample is from the midpoint of the bin it is found in: # mid_values = seq( 0.5, dim(DF)[1], by=1 ) one_over_lambda = sum( mid_values * DF$Number_Observed ) / n lambda = 1/one_over_lambda print( lambda ) s = 1 # one parameter estimated prob = c() for( ii in 1:dim(DF)[1] ){ p = pexp( ii, lambda ) - pexp( ii-1, lambda ) prob = c( prob, p ) } t = length(prob) expected_frequency = n * prob print( round( expected_frequency, 2 ) ) d = sum( (DF$Number_Observed - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 6: # m = 949.4 s = 68.4 source('../Chapter3/table_3_13_1.R') breaks = seq( from=830, to=1100, by=50 ) print( breaks ) h = hist( DF$Average_SAT_Score, breaks, plot=FALSE ) # Compute the expected frequency counts under this binning: # prob = c() for( ii in 1:(length(breaks)-1) ){ p = pnorm( (breaks[ii+1]-m)/s ) - pnorm( (breaks[ii]-m)/s ) prob = c( prob, p ) } n = dim(DF)[1] expected_frequency = n * prob print( round( expected_frequency, 3 ) ) t = length(prob) d = sum( (h$counts - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.10, t-1-2 ) p_value = 1-pchisq( d, t-1-2 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 7: # source('chap_10_sect_4_question_7_data.R') p = sum( Number_of_boys * Number_of_families ) / sum( 2 * Number_of_families ) print( p ) s = 1 # one parameter estimated prob = dbinom( 0:2, 2, p ) t = length(prob) n = sum( Number_of_families ) expected_frequency = n * prob print( expected_frequency ) d = sum( (Number_of_families - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 8: # source('chap_10_sect_4_question_8_data.R') breaks = seq( from=0, to=1, by=0.1 ) print( breaks ) h = hist( data, breaks, plot=FALSE ) s = 0 # no parameters estimated prob = diff( breaks ) t = length(prob) n = length( data ) expected_frequency = n * prob print( expected_frequency ) d = sum( (h$counts - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 9: # source('../Chapter4/case_study_4_2_2_data.R') lambda = 3.87 # =sum( DF$Number_Detected * DF$Frequency ) / sum( DF$Frequency ) s = 1 # one parameter estimated prob = c( dpois( 0:10, lambda ), 1-ppois( 10, lambda ) ) t = length(prob) n = sum( DF$Frequency ) expected_frequency = n * prob print( expected_frequency ) d = sum( (DF$Frequency - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 10: # source('../Chapter4/chap_4_sect_2_question_13_data.R') lambda = sum( DF$Number_Countries * DF$Frequency ) / sum( DF$Frequency ) print( lambda ) s = 1 # one parameter estimated prob = c( dpois( 0:1, lambda ), 1-ppois( 1, lambda ) ) t = length(prob) n = sum( DF$Frequency ) expected_frequency = n * prob print( expected_frequency ) xs = c( DF$Frequency[1:2], sum( DF$Frequency[3:5] ) ) # combine bins to get enough samples in each one d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 11: # source('chap_10_sect_4_question_11_data.R') p = 1/mean(data) print( p ) s = 1 # one parameter estimated mx = 3 prob = c( dgeom( 0:mx, p ), 1-pgeom( mx, p ) ) t = length(prob) n = length( data ) expected_frequency = n * prob print( expected_frequency ) T = table(data) xs = c( T[1:4], sum( T[5:9] ) ) # combine bins to get enough samples in each one d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 4; Question 12: # source('chap_10_sect_4_question_12_data.R') n = max(data) print( n ) s = 1 # one parameter estimated breaks = seq( from=0, to=120, by=20 ) print( breaks ) h = hist( data, breaks, plot=FALSE ) prob = diff( breaks )/120 t = length(prob) n = 50 expected_frequency = n * prob print( expected_frequency ) d = sum( (h$counts - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1-s ) p_value = 1-pchisq( d, t-1-s ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) )