#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----

source('utils.R')

# Ex. 1.62
#
n = 4
x_bar = 76831
r1 = 4*x_bar - 76682 - 77048 # x + y = r1
r2 = 3 * 180^2 - ( (76682-x_bar)^2 + (77048-x_bar)^2 ) #
y_sols = polyroot( c( (r1-x_bar)^2 + x_bar^2 - r2, -2*r1, 2 ) )
x_sols = r1 - y_sols 


# Ex. 1.63:
#
# DF = read.csv( "../../Data/CHO1.ex01-63.txt", header=TRUE, quote="'" ) # this does not look like the data given in this exercise 
#
d = c( 6.3, 6.4, 7.7, 8.4, 8.5, 8.8, 8.9, 9.0, 9.1, 10.0, 10.1, 10.2, 10.6, 10.6,
       10.7, 10.7, 10.8, 10.9, 11.1, 11.2, 11.2, 11.4, 11.9, 11.9, 12.2, 13.1 )
boxplot( d )

x_tilde = median( d )
lower_fourth = median( d[ d<=x_tilde ] )
upper_fourth = median( d[ d>=x_tilde ] )
f_s = fourth_spread(d)

print( sprintf( "mean: %5.3f, median: %5.3f", mean(d), x_tilde ) )
print( sprintf( "lower_fourth= %5.3f, upper_fourth= %5.3f, fourth_spread= %5.3f", lower_fourth, upper_fourth, f_s ) )


# Ex. 1.64:
#
#DF = read.csv( "../../Data/CH01/ex01-64.txt", header=TRUE, quote="'" ) # this does not look like the data given in this exercise
#
DF = data.frame( emission=c( 13.8, 18.3, 32.2, 32.5, 118, 149, 232, 36 ), type=c( rep( 'HC', 4 ), rep( 'CO', 4 ) ) )

boxplot( emission ~ type, data=DF )

hc_emission = DF[ DF$type == 'HC', ]$emission
co_emission = DF[ DF$type == 'CO', ]$emission
print( sprintf("HC sd= %6.3f; CO sd= %6.3f", sd(hc_emission), sd(co_emission)) )
print( sprintf("HC sd/x_bar= %6.3f; CO sd/x_bar= %6.3f", sd(hc_emission) / mean(hc_emission) , sd(co_emission)/mean(co_emission)) )


# Ex. 1.65:
#
d = c( 6, 7, 17, 30, 43, 28, 22, 13, 3 )

p1 = sum(d[3:length(d)]) / sum(d)
p2 = sum(d[1:7]) / sum(d)
p3 = ( sum(d[1:4]) + 0.5*43 ) / sum(d)
print( c( p1, p2, p3 ) )


# Ex. 1.66:
#
DF = read.csv( "../../Data/CH01/ex01-66.txt", header=TRUE, quote="'" )

initial_DF = data.frame( concentration=c( DF$Init.Se, DF$Init.Con[1:14] ), 
             type=c( rep( 'supplement', 16 ), rep( 'control', 14 ) ) )
boxplot( concentration ~ type, data=initial_DF )

final_DF = data.frame( concentration=c( DF$Final.Se, DF$Final.Co[1:14] ), type=c( rep( 'supplement', 16 ), rep( 'control', 14 ) ) )
boxplot( concentration ~ type, data=final_DF )


# Ex. 1.67:
#
DF = read.csv( "../../Data/CH01/ex01-67.txt", header=TRUE, quote="'" )

boxplot( diameter..cm. ~ Gender, data=DF )

male   = DF[ DF['Gender'] == 'M', ]$diameter..cm.
female = DF[ DF['Gender'] == 'F', ]$diameter..cm.

print( c(mean(male), mean(male, trim=0.1)) )
print( c(mean(female), mean(female, trim=0.1)) )


# Ex. 1.70:
#
DF = read.csv( "../../Data/CH01/ex01-70.txt", header=TRUE, quote="'" )
DF_combined = data.frame( measurement=c( DF$Weight, DF$Treadmil ), type=c( rep( 'weight', length(DF$Weight) ), rep( 'treadmil', length(DF$Treadmil) ) ) )

boxplot( measurement ~ type, data=DF_combined )

d = DF$Weight - DF$Treadmil

boxplot( d )


# Ex. 1.72:
#
DF = read.csv( "../../Data/CH01/ex01-72.txt", header=TRUE, quote="'" )

boxplot( volume ~ Health_status, data=DF )


# Ex. 1.73/1.74:
#
DF = read.csv( "../../Data/CH01/ex01-73.txt", header=TRUE, quote="'" )

stem( DF$cadence )
boxplot( DF$cadence )

# Get mode of the cadence data:
#
my_mode <- function(x) {
    ux <- unique(x)
    ux[which.max(tabulate(match(x, ux)))]
}
print( my_mode(DF$cadence) )


# Ex. 1.75:
#
DF = read.csv( "../../Data/CH01/ex01-75.txt", header=TRUE, quote="'" )

boxplot( Fatigue_limit..Mpa. ~ Types.of.Wire, data=DF )

# Apply the median function in each group:
#
by( DF$Fatigue_limit..Mpa., DF$Types.of.Wire, median )


# Ex. 1.77:
#
DF = read.csv( "../../Data/CH01/ex01-77.txt", header=TRUE, quote="'" )
d = DF$pitting_depth..mm.

stem( d )

hist( d, breaks=cumsum( c( 0, 2, 2, 2, 4, 10, 10 ) ) )


# Ex 1.79
#
n = 15
x_bar_n = 12.58
s_n_2 = .512^2
x_16 = 11.8

x_bar_np1 = ( 1/(n+1) ) * ( n * x_bar_n + x_16 )
s_np1_2 = ( (n-1) * s_n_2 + (n/(n+1)) * ( ( x_16 - x_bar_n )^2 ) ) / n
print( c( x_bar_np1, sqrt(s_np1_2) ) )


# Ex 1.80
#
DF = read.csv( "../../Data/CH01/ex01-80.txt", header=TRUE, quote="'" )
d = DF$Frequenc

p1 = sum( d[1:7] ) / sum(d)
p2 = sum( d[13:15] ) / sum(d)
print( c(p1, p2) )


# Ex 1.82
#
d = c( 47, 54, 53, 50, 46, 46, 47, 50, 51, 50, 46, 52, 50, 50 )
exponential_smoothing = function(x, alpha=0.5){
    smooth = c( x[1] )
    for( ii in 2:length(x) ){
        smooth = c( smooth, alpha*x[ii] + (1-alpha)*smooth[ii-1] )
    }
    smooth
}

d_smooth_p1 = exponential_smoothing(d, alpha=0.1)
d_smooth_p5 = exponential_smoothing(d, alpha=0.5)

plot( d, type='b', col='black', pch=19 )
lines( d_smooth_p1, type='l', col='blue', pch=1 )
lines( d_smooth_p5, type='l', col='red', pch=1 )
legend( 'topright', c('raw data', '0.1 ema', '0.5 ema'), cex=1.0, lwd=2, bty='n', lty=c(1,1,1), col=c('black', 'blue', 'red'), pch=c(19, NA, NA) )


# Ex. 1.83:
#
DF = read.csv( "../../Data/CH01/ex01-83.txt", header=TRUE, quote="'" )
y = DF$rainfall..acre.feet.

n = length(y)
y_tilde = median(y)

y_sorted = sort(y)
paired_data_x = c()
paired_data_y = c()
for( ii in 1:floor(n/2) ){
    end_ii = n-(ii-1)
    paired_data_x = c( paired_data_x, y_sorted[end_ii] - y_tilde )
    paired_data_y = c( paired_data_y, y_tilde - y_sorted[ii] )
}

plot( paired_data_x, paired_data_y, col='black' )
grid()
abline( 0, 1 )