# # EPage 493 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Ex 12.58: # DF = read.csv( "../../Data/CH12/ex12-58.txt", header=TRUE, quote="'" ) plot( DF$TOST, DF$RBOT ) r = cor( DF$TOST, DF$RBOT ) r = cor( DF$RBOT, DF$TOST ) m = lm( RBOT ~ TOST, data=DF ) summary(m) # Ex 12.59: # DF = read.csv( "../../Data/CH12/ex12-59.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y, xlab='shear force', ylab='percent fiber dry weight' ) r = cor( DF$x, DF$y ) print( r^2 ) n = length(DF$x) t = r * sqrt( n-2 ) / sqrt( 1 - r^2 ) t_value = qt( 1-0.01, n-2 ) # Ex 12.60: # DF = read.csv( "../../Data/CH12/ex12-60.txt", header=TRUE, quote="'" ) plot( DF$Cl, DF$Co ) r = cor( DF$Cl, DF$Co ) n = length(DF$Cl) t = r * sqrt( n-2 ) / sqrt( 1 - r^2 ) p_value = 1 - pt( t, n-2 ) # Ex 12.61: # DF = read.csv( "../../Data/CH12/ex12-61.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) print( sum( DF$x^2 ) - sum( DF$x )^2 / length(DF$x) ) # check if this matches the books S_xx values r = cor( DF$x, DF$y ) print( r^2 ) # Lets test that R^2 does not change depending on what variable is x and what variable is y: summary( lm( y~x, data=DF ) )$r.squared summary( lm( x~y, data=DF ) )$r.squared # Ex 12.62: # DF = read.csv( "../../Data/CH12/ex12-62.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) n = length( DF$x ) r = 0.449 t = r * sqrt( n-2 ) / sqrt( 1 - r^2 ) p_value = 1 - pt( t, n-2 ) # Ex 12.63: # DF = read.csv( "../../Data/CH12/ex12-63.txt", header=TRUE, quote="'" ) plot( DF$x, DF$y ) n = length( DF$x ) r = cor( DF$x, DF$y ) t = r * sqrt( n-2 ) / sqrt( 1 - r^2 ) p_value = 1 - pt( t, n-2 ) t_value = qt( 1 - 0.05, n-2 ) # Ex 12.64: # n = 26 sum_x = 1613 S_xx = 3756.96 sum_y = 281.9 S_yy = 465.34 sum_xy = 16731 S_xy = sum_xy - sum_x * sum_y / n r = S_xy / ( sqrt( S_xx ) * sqrt( S_yy ) ) nu = 0.5 * log( (1+r)/(1-r) ) alpha = 0.1 z_value = qnorm( 1 - 0.5 * alpha ) cs = nu + c(-1,+1) * z_value / sqrt(n-3) rho_ci = ( exp( 2 * cs ) - 1 ) / ( exp( 2 * cs ) + 1 ) # Test rho=-0.5 vs rho < -0.5: # v = 0.5 * log( (1+r)/(1-r) ) rho_0 = -0.5 z = (v - 0.5 * log( (1+rho_0)/(1-rho_0) ) ) / (1/sqrt( n-3 )) print(z) alpha = 0.05 z_value = qnorm( 1 - 0.5 * alpha ) print(r^2) rho_ci^2 # Ex 12.65: # DF = read.csv( "../../Data/CH12/ex12-65.txt", header=TRUE, quote="'" ) n = length( DF$x ) # compute the confidence interval of rho: r = cor( DF$x, DF$y ) nu = 0.5 * log( (1+r)/(1-r) ) alpha = 0.01 z_value = qnorm( 1 - 0.5 * alpha ) cs = nu + c(-1,+1) * z_value / sqrt(n-3) rho_ci = ( exp( 2 * cs ) - 1 ) / ( exp( 2 * cs ) + 1 ) # Ex 12.66: # DF = read.csv( "../../Data/CH01/ex01-82.txt", header=TRUE, quote="'" ) n = length( DF$x ) # Ex 12.67: # n = 500 p_value = 0.00032 t_value = -qt( p_value / 2, n-2 ) root_fn = function(r){ r * sqrt(n-2) / sqrt( 1 - r^2 ) - t_value } uniroot( root_fn, c(-0.9,+0.9) ) n = 10000 r = 0.022 t = r * sqrt( n-2 ) / sqrt( 1 - r^2 ) p_value = 1 - pt( t, n-2 )