#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----


# Ex 1.10:
#
DF = read.csv( "../../Data/CH01/exp01-02.txt", header=TRUE, quote="'" )
stem( DF$Strength..MPa. )

sum( DF$Strength..MPa. > 10 ) / length(DF$Strength..MPa.)

# Ex 1.11:
#
DF = read.csv( "../../Data/CH01/ex01-11.txt", header=TRUE, quote="'" )
stem( DF$Scores ) 


# Ex 1.12:
#
data = c( .31, .35, .36, .36, .37, .38, .40, .40, .40, .41, .41, .42, .42, .42, .42, .42, .43, .44, .45, .46, .46, .47, .48, .48, .48, .51, .54, .54, .55, .58, .62, .66, .66, .67, .68, .75 )
stem( data )

mean( data ) / diff( range( data ) ) 


# Ex 1.13:
#
DF = read.csv( "../../Data/CH01/ex01-13.txt", header=TRUE, quote="'" )
stem( DF$strength ) 

hv = hist( DF$strength )


# Ex 1.14:
#
DF = read.csv( "../../Data/CH01/ex01-14.txt", header=TRUE, quote="'" )

stem( DF$Rate ) 


# Ex 1.15:
#
Creamy = c( 56, 44, 62, 36, 39, 53, 50, 65, 45, 40, 56, 68, 41, 30, 40, 50, 56, 30, 22 ) 
Crunchy = c( 62, 53, 75, 42, 47, 40, 34, 62, 52, 50, 34, 42, 36, 75, 80, 47, 56, 62 )
library(aplpack)
stem.leaf.backback( Crunchy, Creamy ) 


# Ex 1.16 (EPage 21):
#
DF = read.csv( "../../Data/CH01/exp01-02.txt", header=TRUE, quote="'" )
beam_strength = DF$Strength..MPa.
cylinder_strength = c( 6.1, 5.8, 7.8, 7.1, 7.2, 9.2, 6.6, 8.3, 7.0, 8.3, 7.8, 8.1, 7.4, 8.5, 8.9, 9.8, 9.7, 14.1, 12.6, 11.2 )

library(aplpack)
stem.leaf.backback( beam_strength, cylinder_strength, show.no.depths=TRUE )

print( sum( cylinder_strength > 10 ) / length( cylinder_strength ) )


# Ex 1.17:
#
DF = read.csv( "../../Data/CH01/ex01-17.txt", header=TRUE, quote="'" )

T = table( DF$Bad_Spec ) 
RF = T / sum(T) 

# Compute the needed partial sums:
p1 = sum( RF[1:6] ) # at most five 
p2 = sum( RF[1:5] ) # fewer than five 
p3 = sum( RF[6:length(RF)] ) # at least five 
print( c( p1, p2, p3 ) ) 

hist( DF$Bad_Spec, breaks=seq( -0.5, max(DF$Bad_Spec)+0.5, length.out=10), freq=FALSE )


# Ex 1.18
#
DF = read.csv( "../../Data/CH01/ex01-18.txt", header=TRUE, quote="'" )

# Make a data set we can call the hist function on:
#
data = c()
for( ii in 1:length(DF$Frequenc) ){
    data = c( data, rep( DF$Number.o[ii], DF$Frequenc[ii] ) )
}
hist( data )

#
p1 = sum( DF$Frequenc >= 5 ) / sum( DF$Frequenc ) # at least 5 papers
p2 = sum( DF$Frequenc >= 10 ) / sum( DF$Frequenc ) # at least 10 papers
p3 = sum( DF$Frequenc > 10 ) / sum( DF$Frequenc ) # at least 10 papers 
print( c(p1, p2, p3) ) 


# Ex 1.19
#
DF = read.csv( "../../Data/CH01/ex01-19.txt", header=TRUE, quote="'" )

inds = DF$contaminants >= 1
p = sum( DF$Frequenc[ inds ] ) / sum( DF$Frequenc ) 
print( sprintf( "at least one particle= %10.6f", p ) )

inds = DF$contaminants >= 5
p = sum( DF$Frequenc[ inds ] ) / sum( DF$Frequenc ) 
print( sprintf( "at least five particles= %10.6f", p ) )

inds = ( DF$contaminants >= 5 ) & ( DF$contaminants <= 10 )
p = sum( DF$Frequenc[ inds ] ) / sum( DF$Frequenc ) 
print( sprintf( "between five and ten particles= %10.6f", p ) )

inds = ( DF$contaminants > 5 ) & ( DF$contaminants < 10 )
p = sum( DF$Frequenc[ inds ] ) / sum( DF$Frequenc ) 
print( sprintf( "strictly between five and ten particles= %10.6f", p ) )


# Ex 1.20
#
DF = read.csv( "../../Data/CH01/ex01-20.txt", header=TRUE, quote="'" )

stem( DF$length )

hist( DF$length, breaks=seq( 0, 6000, by=1000 ) ) 

p1 = sum( DF$length < 2000 ) / length( DF$length )
p2 = sum( (DF$length > 2000) & (DF$length < 4000) ) / length( DF$length ) 
print( c( p1, p2 ) )


# Ex 1.21
#
DF = read.csv( "../../Data/CH01/ex01-21.txt", header=TRUE, quote="'" )

hist( DF$y )

p1 = sum( DF$y==0 ) / length( DF$y )
p2 = sum( DF$y>=1 ) / length( DF$y )
print( c( p1, p2 ) )

hist( DF$z )

p1 = sum( DF$z<=5 ) / length( DF$z )
p2 = sum( DF$z<5 ) / length( DF$z )
print( c( p1, p2) )


# Ex. 1.23:
#
data = c( 86, 146, 251, 653, 98, 249, 400, 292, 131, 169, 175,
          176, 76, 264, 15, 364, 195, 262, 88, 264, 157, 220,
          42, 321, 180, 198, 38, 20, 61, 121, 282, 224, 149,
          180, 325, 250, 196, 90, 229, 166, 38, 337, 65, 151, 341,
          40, 40, 135, 597, 246, 211, 180, 93, 315, 353, 571, 124,
          279, 81, 186, 497, 182, 423, 185, 229, 400, 338, 290, 398,
          71, 246, 185, 188, 568, 55, 55, 61, 244, 20, 284, 393, 396,
          203, 829, 239, 236, 286, 194, 277, 143, 198, 264, 105, 203,
          124, 137, 135, 350, 193, 188 )

hist( data, breaks=seq( 0, 900, by=100 ) )
hist( data, breaks=c( 0, 50, 100, 150, 200, 300, 400, 500, 600, 900 ) )
sum( data >= 100 ) / length( data )


# Ex 1.24:
#
DF = read.csv( "../../Data/CH01/ex01-24.txt", header=TRUE, quote="'" )

hist( DF$Shear_Strength..lb., breaks=seq( 4000, 6000, by=200 ), freq=F )


# Ex 1.25:
#
DF = read.csv( "../../Data/CH01/ex01-25.txt", header=TRUE, quote="'" )

hist( DF$IDT, breaks=seq( 10, 80, by=10 ) ) # the original data
hist( DF$log10.ID, breaks=seq( 1.1, 2.0, by=0.1 ) ) # the transformed data


# Ex 1.26:
#
sum( c( 0.177, 0.166, 0.175 ) )
sum( c( 0.078, 0.044, 0.03) )
sum( c( 0.175, 0.136, 0.194/2 ) )


# Ex 1.27:
#
DF = read.csv( "../../Data/CH01/ex01-27.txt", header=TRUE, quote="'" )

h = hist( DF$lifetime, breaks=seq( 0, 550, by=50 ) )

range( log( DF$lifetime ) )
h = hist( log( DF$lifetime ), breaks=seq( 2.0, 6.5, by=0.5 ) )

p1 = sum( DF$lifetime<100 ) / length( DF$lifetime )
p2 = sum( DF$lifetime>=200 ) / length( DF$lifetime )
print( c( p1, p2 ) )


# Ex 1.28:
#
DF = read.csv( "../../Data/CH01/ex01-28.txt", header=TRUE, quote="'" )

hist( DF$IQ )


# Ex. 1.29:
#
DF = read.csv( "../../Data/CH01/ex01-29.txt", header=TRUE, quote="'", skip=1 )

table( DF$Complaint )
table( DF$Complaint )/length(DF$Complaint)


# Ex. 1.30:
#
counts = c( 126, 210, 67, 54, 131 )
plot( sort( counts, decreasing=TRUE ), type='p' )
grid()


# Ex. 1.31:
#
DF = read.csv( "../../Data/CH01/ex01-24.txt", header=TRUE, quote="'" )
h = hist( DF$Shear_Strength..lb., breaks=seq( 4000, 6000, by=200 ), freq=F )

print( cumsum( h$counts ) )
print( cumsum( h$counts / sum(h$counts) ) )


# Ex. 1.32:
#
DF = read.csv( "../../Data/CH01/ex01-32.txt", header=TRUE, quote="'" )

relative = diff( DF$Cumulati )
print( relative )