# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # Ex 1.10: # DF = read.csv( "../../Data/CH01/exp01-02.txt", header=TRUE, quote="'" ) stem( DF$Strength..MPa. ) sum( DF$Strength..MPa. > 10 ) / length(DF$Strength..MPa.) # Ex 1.11: # DF = read.csv( "../../Data/CH01/ex01-11.txt", header=TRUE, quote="'" ) stem( DF$Scores ) # Ex 1.12: # data = c( .31, .35, .36, .36, .37, .38, .40, .40, .40, .41, .41, .42, .42, .42, .42, .42, .43, .44, .45, .46, .46, .47, .48, .48, .48, .51, .54, .54, .55, .58, .62, .66, .66, .67, .68, .75 ) stem( data ) mean( data ) / diff( range( data ) ) # Ex 1.13: # DF = read.csv( "../../Data/CH01/ex01-13.txt", header=TRUE, quote="'" ) stem( DF$strength ) hv = hist( DF$strength ) # Ex 1.14: # DF = read.csv( "../../Data/CH01/ex01-14.txt", header=TRUE, quote="'" ) stem( DF$Rate ) # Ex 1.15: # Creamy = c( 56, 44, 62, 36, 39, 53, 50, 65, 45, 40, 56, 68, 41, 30, 40, 50, 56, 30, 22 ) Crunchy = c( 62, 53, 75, 42, 47, 40, 34, 62, 52, 50, 34, 42, 36, 75, 80, 47, 56, 62 ) library(aplpack) stem.leaf.backback( Crunchy, Creamy ) # Ex 1.16 (EPage 21): # DF = read.csv( "../../Data/CH01/exp01-02.txt", header=TRUE, quote="'" ) beam_strength = DF$Strength..MPa. cylinder_strength = c( 6.1, 5.8, 7.8, 7.1, 7.2, 9.2, 6.6, 8.3, 7.0, 8.3, 7.8, 8.1, 7.4, 8.5, 8.9, 9.8, 9.7, 14.1, 12.6, 11.2 ) library(aplpack) stem.leaf.backback( beam_strength, cylinder_strength, show.no.depths=TRUE ) print( sum( cylinder_strength > 10 ) / length( cylinder_strength ) ) # Ex 1.17: # DF = read.csv( "../../Data/CH01/ex01-17.txt", header=TRUE, quote="'" ) T = table( DF$Bad_Spec ) RF = T / sum(T) # Compute the needed partial sums: p1 = sum( RF[1:6] ) # at most five p2 = sum( RF[1:5] ) # fewer than five p3 = sum( RF[6:length(RF)] ) # at least five print( c( p1, p2, p3 ) ) hist( DF$Bad_Spec, breaks=seq( -0.5, max(DF$Bad_Spec)+0.5, length.out=10), freq=FALSE ) # Ex 1.18 # DF = read.csv( "../../Data/CH01/ex01-18.txt", header=TRUE, quote="'" ) # Make a data set we can call the hist function on: # data = c() for( ii in 1:length(DF$Frequenc) ){ data = c( data, rep( DF$Number.o[ii], DF$Frequenc[ii] ) ) } hist( data ) # p1 = sum( DF$Frequenc >= 5 ) / sum( DF$Frequenc ) # at least 5 papers p2 = sum( DF$Frequenc >= 10 ) / sum( DF$Frequenc ) # at least 10 papers p3 = sum( DF$Frequenc > 10 ) / sum( DF$Frequenc ) # at least 10 papers print( c(p1, p2, p3) ) # Ex 1.19 # DF = read.csv( "../../Data/CH01/ex01-19.txt", header=TRUE, quote="'" ) inds = DF$contaminants >= 1 p = sum( DF$Frequenc[ inds ] ) / sum( DF$Frequenc ) print( sprintf( "at least one particle= %10.6f", p ) ) inds = DF$contaminants >= 5 p = sum( DF$Frequenc[ inds ] ) / sum( DF$Frequenc ) print( sprintf( "at least five particles= %10.6f", p ) ) inds = ( DF$contaminants >= 5 ) & ( DF$contaminants <= 10 ) p = sum( DF$Frequenc[ inds ] ) / sum( DF$Frequenc ) print( sprintf( "between five and ten particles= %10.6f", p ) ) inds = ( DF$contaminants > 5 ) & ( DF$contaminants < 10 ) p = sum( DF$Frequenc[ inds ] ) / sum( DF$Frequenc ) print( sprintf( "strictly between five and ten particles= %10.6f", p ) ) # Ex 1.20 # DF = read.csv( "../../Data/CH01/ex01-20.txt", header=TRUE, quote="'" ) stem( DF$length ) hist( DF$length, breaks=seq( 0, 6000, by=1000 ) ) p1 = sum( DF$length < 2000 ) / length( DF$length ) p2 = sum( (DF$length > 2000) & (DF$length < 4000) ) / length( DF$length ) print( c( p1, p2 ) ) # Ex 1.21 # DF = read.csv( "../../Data/CH01/ex01-21.txt", header=TRUE, quote="'" ) hist( DF$y ) p1 = sum( DF$y==0 ) / length( DF$y ) p2 = sum( DF$y>=1 ) / length( DF$y ) print( c( p1, p2 ) ) hist( DF$z ) p1 = sum( DF$z<=5 ) / length( DF$z ) p2 = sum( DF$z<5 ) / length( DF$z ) print( c( p1, p2) ) # Ex. 1.23: # data = c( 86, 146, 251, 653, 98, 249, 400, 292, 131, 169, 175, 176, 76, 264, 15, 364, 195, 262, 88, 264, 157, 220, 42, 321, 180, 198, 38, 20, 61, 121, 282, 224, 149, 180, 325, 250, 196, 90, 229, 166, 38, 337, 65, 151, 341, 40, 40, 135, 597, 246, 211, 180, 93, 315, 353, 571, 124, 279, 81, 186, 497, 182, 423, 185, 229, 400, 338, 290, 398, 71, 246, 185, 188, 568, 55, 55, 61, 244, 20, 284, 393, 396, 203, 829, 239, 236, 286, 194, 277, 143, 198, 264, 105, 203, 124, 137, 135, 350, 193, 188 ) hist( data, breaks=seq( 0, 900, by=100 ) ) hist( data, breaks=c( 0, 50, 100, 150, 200, 300, 400, 500, 600, 900 ) ) sum( data >= 100 ) / length( data ) # Ex 1.24: # DF = read.csv( "../../Data/CH01/ex01-24.txt", header=TRUE, quote="'" ) hist( DF$Shear_Strength..lb., breaks=seq( 4000, 6000, by=200 ), freq=F ) # Ex 1.25: # DF = read.csv( "../../Data/CH01/ex01-25.txt", header=TRUE, quote="'" ) hist( DF$IDT, breaks=seq( 10, 80, by=10 ) ) # the original data hist( DF$log10.ID, breaks=seq( 1.1, 2.0, by=0.1 ) ) # the transformed data # Ex 1.26: # sum( c( 0.177, 0.166, 0.175 ) ) sum( c( 0.078, 0.044, 0.03) ) sum( c( 0.175, 0.136, 0.194/2 ) ) # Ex 1.27: # DF = read.csv( "../../Data/CH01/ex01-27.txt", header=TRUE, quote="'" ) h = hist( DF$lifetime, breaks=seq( 0, 550, by=50 ) ) range( log( DF$lifetime ) ) h = hist( log( DF$lifetime ), breaks=seq( 2.0, 6.5, by=0.5 ) ) p1 = sum( DF$lifetime<100 ) / length( DF$lifetime ) p2 = sum( DF$lifetime>=200 ) / length( DF$lifetime ) print( c( p1, p2 ) ) # Ex 1.28: # DF = read.csv( "../../Data/CH01/ex01-28.txt", header=TRUE, quote="'" ) hist( DF$IQ ) # Ex. 1.29: # DF = read.csv( "../../Data/CH01/ex01-29.txt", header=TRUE, quote="'", skip=1 ) table( DF$Complaint ) table( DF$Complaint )/length(DF$Complaint) # Ex. 1.30: # counts = c( 126, 210, 67, 54, 131 ) plot( sort( counts, decreasing=TRUE ), type='p' ) grid() # Ex. 1.31: # DF = read.csv( "../../Data/CH01/ex01-24.txt", header=TRUE, quote="'" ) h = hist( DF$Shear_Strength..lb., breaks=seq( 4000, 6000, by=200 ), freq=F ) print( cumsum( h$counts ) ) print( cumsum( h$counts / sum(h$counts) ) ) # Ex. 1.32: # DF = read.csv( "../../Data/CH01/ex01-32.txt", header=TRUE, quote="'" ) relative = diff( DF$Cumulati ) print( relative )