#
# EPage 214 
#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

save_plots = F

library(plotrix)
library(DAAG)

set.seed(0)

litters.lm.ridge = lm.ridge( brainwt ~ bodywt + lsize, data=litters )

# split rows randomly into *3* groups: 
rand = sample(1:20) %% 3 + 1

inds_1 = (1:20)[rand==1]
inds_2 = (1:20)[rand==2]
inds_3 = (1:20)[rand==3]

lambdas = seq( from=0, to=10, length.out=50 )
mse_mean_estimates = rep( 0, length(lambdas) )
mse_std_err_estimates = rep( 0, length(lambdas) )
lmbdaII=1
for( lmbda in lambdas ){

  # For this value of lambda do *three* cross validations:
  # 
  m1 = lm.ridge( brainwt ~ bodywt + lsize, lambda=lmbda, data=litters[-inds_1,] )
  X = as.matrix( cbind( rep(1,length(inds_1)), litters[inds_1,1:2] ) )
  colnames(X) = NULL
  yhat = X %*% as.matrix( coefficients(m1) )
  y = litters[inds_1,3]
  mse_1 = mean( (y - yhat)^2 )
  
  m2 = lm.ridge( brainwt ~ bodywt + lsize, lambda=lmbda, data=litters[-inds_2,] )
  X = as.matrix( cbind( rep(1,length(inds_2)), litters[inds_2,1:2] ) )
  colnames(X) = NULL
  yhat = X %*% as.matrix( coefficients(m2) )
  y = litters[inds_2,3]
  mse_2 = mean( (y - yhat)^2 )
  
  m3 = lm.ridge( brainwt ~ bodywt + lsize, lambda=lmbda, data=litters[-inds_3,] )
  X = as.matrix( cbind( rep(1,length(inds_3)), litters[inds_3,1:2] ) )
  colnames(X) = NULL
  yhat = X %*% as.matrix( coefficients(m3) )
  y = litters[inds_3,3]
  mse_3 = mean( (y - yhat)^2 )

  mse_mean_estimates[lmbdaII] = mean( c( mse_1, mse_2, mse_3 ) )
  mse_std_err_estimates[lmbdaII] = sqrt( var( c( mse_1, mse_2, mse_3 ) ) )/sqrt(3)

  lmbdaII = lmbdaII+1
}

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/prob_8_lambda_plot.eps", onefile=FALSE, horizontal=FALSE) }

plotCI( x=lambdas, y=mse_mean_estimates, uiw=mse_std_err_estimates, xlab="lambda", ylab="MSE (with CI)" );

if( save_plots ){ dev.off() }

# Take the largest value of lambda and retrain using the entire dataset: 
#
lambda_opt = lambdas[length(lambdas)]
ridge_model = lm.ridge( brainwt ~ bodywt + lsize, lambda=lambda_opt, data=litters )
print( coefficients(ridge_model) )

# Train using the linear model:
#
m0 = lm( brainwt ~ bodywt + lsize, data=litters )
print( coefficients( m0 ) )

print( predict( m0, newdata=data.frame(bodywt=7,lsize=10), interval="confidence", level=0.95 ) )

# Part (b):
#
# Perform a monte-carlo estimate to estimate the confidence interval of the predictions under each model: 
#
ci_of_prediction = function(nboosts=1000, input_data=litters){

  n_samples = dim(input_data)[1]
  n_take = round( 3*n_samples/4 )

  ridge_predicted_values = rep( 0, nboosts )
  lm_predicted_values = rep( 0, nboosts )
  
  for( ii in 1:nboosts ){
    rnd = sample( n_samples )[1:n_take] # get the samples to train with
    ridge_model = lm.ridge( brainwt ~ bodywt + lsize, lambda=lambda_opt, data=input_data[rnd,] )
    lm_model = lm( brainwt ~ bodywt + lsize, data=input_data[rnd,] )

    # Make predictions : 
    ridge_predicted_values[ii] = sum( coefficients( ridge_model  ) * c( 1, 7, 10 ) )
    lm_predicted_values[ii] = predict( lm_model, newdata=data.frame(bodywt=7,lsize=10) )
  }

  ci_ridge = quantile( ridge_predicted_values, probs=c(0.05,0.95) )
  ci_lm    = quantile( lm_predicted_values, probs=c(0.05,0.95) )
  
  c( mean( ridge_predicted_values ), ci_ridge, mean( lm_predicted_values ), ci_lm )

}

print( ci_of_prediction() )