#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
# EPage 138
# 
#-----

if( ! require("ISLR") ){ install.packages("ISLR"); library("ISLR") }
if( ! require("leaps") ){ install.packages("leaps"); library("leaps") }
if( ! require("glmnet") ){ install.packages("glmnet"); library("glmnet") }

save_plots = F

set.seed(0)

n = 100
X = rnorm(n)
epsilon = 0.1 * rnorm(n)

beta_0 = 1.0
beta_1 = -0.1
beta_2 = +0.05
beta_3 = 0.75

Y = beta_0 + beta_1 * X + beta_2 * X^2 + beta_3 * X^3 + epsilon

DF = data.frame( Y=Y, X=X, X2=X^2, X3=X^3, X4=X^4, X5=X^5, X6=X^6, X7=X^7, X8=X^8, X9=X^9, X10=X^10 )

# Use the validation approach with regsubsets
#
train = sample(c(TRUE,FALSE), n, rep=TRUE) # will roughly assign TRUE to one-half of the data (FALSE to the other half).
test = (!train)


#--
# Apply best subset selection: 
#--
regfit.full = regsubsets( Y ~ ., data=DF[train,], nvmax=10 )
print( summary( regfit.full ) )

reg.summary = summary( regfit.full )

# Test models on the validation set:
#
test.mat = model.matrix( Y ~ ., data=DF[test,] )
val.errors = rep(NA,10)
for( ii in 1:10 ){
  coefi = coef( regfit.full, id=ii )
  pred = test.mat[,names(coefi)] %*% coefi
  val.errors[ii] = mean( ( DF$Y[test] - pred )^2 ) 
}
print( "best subset validation errors" )
print( val.errors )
k = which.min( val.errors ) 
print( k )
print( coef( regfit.full, id=k ) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/prob_8_exhaustive_summary_plots.eps", onefile=FALSE, horizontal=FALSE) }

old.par = par( mfrow=c(1,4) )
#plot( reg.summary$rss, xlab="Number of variables", ylab="RSS" )
plot( reg.summary$cp, xlab="Number of variables", ylab="Cp", pch=19 )
plot( reg.summary$bic, xlab="Number of variables", ylab="BIC", pch=19 )
plot( reg.summary$adjr2, xlab="Number of variables", ylab="adjusted R2", pch=19 )

plot( val.errors, xlab="Number of variables", ylab="Validation Errors", pch=19 )

par( mfrow=c(1,1) )

if( save_plots ){ dev.off() }


#--
# Now apply foward selection on the training set:
#--
regfit.forward = regsubsets( Y ~ ., data=DF[train,], nvmax=10, method="forward" )
print( summary( regfit.forward ) )

reg.summary = summary( regfit.forward )

# Test models on the validation set:
#
test.mat = model.matrix( Y ~ ., data=DF[test,] )
val.errors = rep(NA,10)
for( ii in 1:10 ){
  coefi = coef( regfit.forward, id=ii )
  pred = test.mat[,names(coefi)] %*% coefi
  val.errors[ii] = mean( ( DF$Y[test] - pred )^2 ) 
}
print( "forward selection validation errors" )
print( val.errors )
k = which.min( val.errors ) 
print( k )
print( coef( regfit.forward, id=k ) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/prob_8_forward_summary_plots.eps", onefile=FALSE, horizontal=FALSE) }

old.par = par( mfrow=c(1,4) )
#plot( reg.summary$rss, xlab="Number of variables", ylab="RSS" )
plot( reg.summary$cp, xlab="Number of variables", ylab="Cp", pch=19 )
plot( reg.summary$bic, xlab="Number of variables", ylab="BIC", pch=19 )
plot( reg.summary$adjr2, xlab="Number of variables", ylab="adjusted R2", pch=19 )

plot( val.errors, xlab="Number of variables", ylab="Validation Errors", pch=19 )

par( mfrow=c(1,1) )

if( save_plots ){ dev.off() }


#--
# Now apply backwards selection to the training set:
#--
regfit.backward = regsubsets( Y ~ ., data=DF[train,], nvmax=10, method="backward" )
print( summary( regfit.backward ) )

reg.summary = summary( regfit.backward )

# Test models on the validation set:
#
test.mat = model.matrix( Y ~ ., data=DF[test,] )
val.errors = rep(NA,10)
for( ii in 1:10 ){
  coefi = coef( regfit.backward, id=ii )
  pred = test.mat[,names(coefi)] %*% coefi
  val.errors[ii] = mean( ( DF$Y[test] - pred )^2 ) 
}
print( "backwards selection validation errors" )
print( val.errors )
k = which.min( val.errors ) 
print( k )
print( coef( regfit.backward, id=k ) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/prob_8_backward_summary_plots.eps", onefile=FALSE, horizontal=FALSE) }

old.par = par( mfrow=c(1,4) )
#plot( reg.summary$rss, xlab="Number of variables", ylab="RSS" )
plot( reg.summary$cp, xlab="Number of variables", ylab="Cp", pch=19 )
plot( reg.summary$bic, xlab="Number of variables", ylab="BIC", pch=19 )
plot( reg.summary$adjr2, xlab="Number of variables", ylab="adjusted R2", pch=19 )

plot( val.errors, xlab="Number of variables", ylab="Validation Errors", pch=19 )
par( mfrow=c(1,1) )

if( save_plots ){ dev.off() }


#--
# Now apply the lasso to our training set:
#--

# First fit the lasso model for all of the given lambda values :
# 
grid = 10^seq(10,-2,length=100) # a grid of lambda values 
Y = DF$Y
MM = model.matrix( Y ~ ., data=DF ) # the predictors as a datamatrix 
lasso.mod = glmnet( MM, Y, alpha=1, lambda=grid ) 
plot(lasso.mod) # plots the extracted coefficients as a function of lambda 

# Apply cross validation (to pick the best value of lambda):
# 
cv.out = cv.glmnet( MM, Y, alpha=1 )
bestlam = cv.out$lambda.1se
print( "lasso CV best value of lambda (one standard error)" )
print( bestlam )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/prob_8_lasso_cv_errors.eps", onefile=FALSE, horizontal=FALSE) }
plot(cv.out)
if( save_plots ){ dev.off() }

# Extract the optimal coefficients used:
#
lasso.coef = predict( lasso.mod, type="coefficients", s=bestlam )
print( lasso.coef ) 


#
# Part (f) Try a different regression function:
#
X = rnorm(n)
epsilon = 0.1 * rnorm(n)

beta_0 = 1.0 
beta_7 = 2.5
Y = beta_0 + beta_7 * X^7 + epsilon
DF = data.frame( Y=Y, X=X, X2=X^2, X3=X^3, X4=X^4, X5=X^5, X6=X^6, X7=X^7, X8=X^8, X9=X^9, X10=X^10 )

train = sample(c(TRUE,FALSE), n, rep=TRUE) # will roughly assign TRUE to one-half of the data (FALSE to the other half).
test = (!train)

# Best subset selection:
# 
regfit.full = regsubsets( Y ~ ., data=DF[train,], nvmax=10 )
print( summary( regfit.full ) )

# Test best subset models on the validation set:
#
test.mat = model.matrix( Y ~ ., data=DF[test,] )
val.errors = rep(NA,10)
for( ii in 1:10 ){
  coefi = coef( regfit.full, id=ii )
  pred = test.mat[,names(coefi)] %*% coefi
  val.errors[ii] = mean( ( DF$Y[test] - pred )^2 ) 
}
print( "best subsets validation errors" )
print( val.errors )
k = which.min( val.errors )
print( k ) 
print( "best subsets optimal coefficients" )
print( coef( regfit.full, id=k ) ) # print the coefficients of the best model 
print( val.errors[k] ) 

# Using the lasso technique:
#

# First apply cross validation (to find the optimal value of lambda):
# 
MM = model.matrix( Y ~ ., data=DF )

cv.out = cv.glmnet( MM, Y, alpha=1 )
plot( cv.out ) 
bestlam = cv.out$lambda.1se
print( "best lambda (1 se)" )
print( bestlam ) 

# Now fit the lasso with this value of lambda: 
lasso.mod = glmnet( MM, Y, alpha=1 )

lasso.coef = predict( lasso.mod, type="coefficients", s=bestlam )
print( "lasso optimal coefficients" )
print( lasso.coef )

print( "I do not think the predict method is working correctly..." )
lasso.predict = predict( lasso.mod, s=bestlam, newx=MM )
print( "lasso MSE error" )
print( mean( ( Y - lasso.predict )^2 ) )