H0_linear_model = function( X, Y ){ # # Tests the hypothesis that the data comes from a linear model vs. coming from a nonlinear model # # See Exercise 14 EPage 507 from the book :Probability and Statistics: For Engineering and the Sciences by Jay L. Devore # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- uX = unique(X) c = length(uX) n = length(Y) ns = c() # holds the number of samples that have the same x value Y_bar_i = c() # Holds \bar{Y}_i (average of all Y's measured at the same value of x_i) for( ii in 1:c ){ inds = X == uX[ii] stopifnot( sum(inds)>0 ) # we need duplicate Y measurements at each value of x ns = c( ns, sum(inds) ) Y_bar_i = c( Y_bar_i, mean( Y[inds] ) ) } SSPE = sum( Y^2 ) - sum( ns * Y_bar_i^2 ) # the sum of square pure error m = lm( Y ~ X ) SSE = sum( m$residuals^2 ) SSLF = SSE - SSPE # "mean square" values: MSPE = SSPE / (n-c) MSLF = SSLF / (c-2) f_stat = MSLF / MSPE alpha = 1 - pf( f_stat, c-2, n-c ) # return the probability that our statistic is this large or larger by chance return( list( f_stat=f_stat, alpha=alpha ) ) }