# 
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

#
# EPage 140
#

save_plots = T

DF = read.csv( "../../Data/ELISA.csv", header=TRUE )

# Plot Y=Optical_Density vs. X=Antigen_Concentration
if( save_plots ){ postscript ("../../WriteUp/Graphics/Chapter6/elisa_scatter_plot.eps", onefile=FALSE, horizontal=FALSE) }
plot( DF$Antigen_Concentration, DF$Optical_Density )
if( save_plots ){ dev.off() }

# Do we want to transform X in any way?
if( save_plots ){ postscript ("../../WriteUp/Graphics/Chapter6/elisa_concentration_density_plots.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(1,2))
plot( density( DF$Antigen_Concentration ) )
plot( density( log(DF$Antigen_Concentration) ) )
par(mfrow=c(1,1))
if( save_plots ){ dev.off() }

# Is there a linear relationship of concentration with optical response?
#
m0 = lm( Optical_Density ~ Antigen_Concentration, data=DF )
summary(m0)

# Look at the residual plot to see if a quadratic term suggests itself
if( save_plots ){ postscript ("../../WriteUp/Graphics/Chapter6/elisa_lm0_residual_plot.eps", onefile=FALSE, horizontal=FALSE) }
plot( m0, which=1 )
if( save_plots ){ dev.off() }

DF$LConcentration = log(DF$Antigen_Concentration)
DF$Antigen_Concentration = NULL

plot( DF$LConcentration, DF$Optical_Density )

# Fit a model
m1 = lm( Optical_Density ~ LConcentration, data=DF )
summary(m1)

plot( m1, which=1 )

m2 = lm( Optical_Density ~ LConcentration + I(LConcentration^2), data=DF )
summary(m2)

print( c( summary(m0)$r.squared, summary(m1)$r.squared, summary(m2) $r.squared ) )

# Are there differences in the 11 runs? We create eleven indicator variable runid1 - runidll:
#
run_names = c()
for( ii in 1:11 ){
    mask = DF$Run_Number == ii
    name = paste0( 'runid_', sprintf('%d',ii) )
    DF[name] = 0
    DF[mask,name] = 1.
    run_names = c( run_names, name )
}

# Fit a model without including runl i.e. use run2-runll i.e. 10 offsets from some baseline.
# The intercept is the runid_1l baseline
#
indicators = paste( run_names[-1], collapse=" + " )
full_form = paste0( "Optical_Density ~ LConcentration + ", indicators )
summary( lm( full_form, data=DF ) )

# Fit a model with 11 slopes and a single intercept:
#
indicators = paste( run_names, collapse=" + LConcentration:" )
indicators = paste0( "LConcentration:", indicators )
full_form = paste0( "Optical_Density ~ ", indicators )

m_diff_slopes = lm( full_form, data=DF )
summary( m_diff_slopes )

# Build a dataframe to check that the formula we used to build model above is what we think it is:
#
lc = data.frame( LConcentration=rep(1, 11) )
st = data.frame( diag( 11 ) )
colnames(st) = run_names
DF_new = cbind( lc, st ) 
predict( m_diff_slopes, newdata=DF_new ) # this is giving me the sum of LConcentration:runid_ii + Intercept which is correct