library(stats) # Section 3; Question 2: # k0 = 35 k1 = 55 k2 = 10 xs = c( k0, k1, k2 ) n = k0 + k1 + k2 prob = dhyper( 0:2, 4, 6, 2 ) expected_frequency = n * prob t = length( prob ) d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.1, t-1 ) p_value = 1 - pchisq( d, t-1 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 3; Question 3: # prob = dbinom( 0:2, 2, 2/5 ) t = length( prob ) expected_frequency = n * prob d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.1, t-1 ) p_value = 1-pchisq( d, t-1 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 3; Question 4: # n = 2650 k0 = 494 # midnight - 4AM k1 = n - k0 # 4AM - midnight xs = c( k0, k1 ) prob = c( 4/24, 20/24 ) t = length( prob ) expected_frequency = n * prob d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.1, t-1 ) p_value = 1-pchisq( d, t-1 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 3; Question 5: # p0 = 4/24 z = ( xs[1] - n*p0 )/sqrt( n*p0*(1-p0) ) z_crit = qnorm( 1-0.05 ) p_value = 1-pnorm( z ) print( sprintf('z= %f; z_critical= %f; p_value= %f', z, z_crit, p_value) ) # Section 3; Question 6: # n = 5139 k0 = 1383 k1 = n - k0 xs = c( k0, k1 ) prob = c( 1/4, 3/4 ) expected_frequency = n * prob t = length( prob ) d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1 ) p_value = 1-pchisq( d, t-1 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 3; Question 7: # source('chap_10_sect_3_question_7_data.R') xs = DF$Number n = sum( xs ) prob = c( 0.3, 0.2, 0.2, 0.1, 0.1, 0.1 ) expected_frequency = n * prob t = length( prob ) d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1 ) p_value = 1-pchisq( d, t-1 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 3; Question 8: # source('chap_10_sect_3_question_8_data.R') n = sum( DF$Number_of_Years ) k = sum( DF$Number_of_Games ) xs = c( k, n-k ) prob = c( 1/2, 1/2 ) expected_frequency = prob * n t = length(prob) d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1 ) p_value = 1-pchisq( d, t-1 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 3; Question 9: # source('chap_10_sect_3_question_9_data.R') n = sum( Number_of_Winners ) n_spots = length( Number_of_Winners ) prob = rep( 1/n_spots, n_spots ) expected_frequency = prob * n xs = Number_of_Winners t = length(prob) d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1 ) p_value = 1-pchisq( d, t-1 ) print( sprintf('d = %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 3; Question 10: # source('chap_10_sect_3_question_10_data.R') breaks = seq( from=220, to=300, by=10 ) h = hist( data, breaks, plot=FALSE ) # Compute the expected frequency counts under this binning: # m = 266 s = 16 prob = c() for( ii in 1:(length(breaks)-1) ){ p = pnorm( (breaks[ii+1]-m)/s ) - pnorm( (breaks[ii]-m)/s ) prob = c( prob, p ) } n = length(data) expected_frequency = n * prob print( round( expected_frequency, 3 ) ) t = length(prob) d = sum( (h$counts - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.10, t-1 ) p_value = 1-pchisq( d, t-1 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) ) # Section 3; Question 11: # prob = c( 1/27, 7/27, 19/27 ) xs = c( 8, 16, 26 ) n = sum( xs ) expected_frequency = n * prob print( round( expected_frequency, 3 ) ) t = length(prob) d = sum( (xs - expected_frequency)^2 / expected_frequency ) d_crit = qchisq( 1-0.05, t-1 ) p_value = 1-pchisq( d, t-1 ) print( sprintf('d= %f; d_critical= %f; p_value= %f', d, d_crit, p_value) )