source('../../Data/data_loaders.R')

DF = load_appendix_cement_data()

null = lm( Y ~ 1, data=DF )
full = lm( Y ~ ., data=DF )

# Forward selection:
step( null, scope=list(lower=null, upper=full), direction="forward" )

# Backward selection:
step( full, direction="backward" )

# Both directions:
step( null, scope=list(upper=full), direction="both" )

# Now try the leaps package:
#
if( ! require("leaps") ){
    install.packages("leaps")
    library("leaps")
}
rss = regsubsets( Y~., data=DF )
print( summary( rss ) )


# Now use the routine cv_all_subsets.R:
#
# http://www.waxworksmath.com/Authors/G_M/Hastie/Code/Chapter3/cv_all_subsets.R
#
# Code here is modeled after that to be found
#
# http://www.waxworksmath.com/Authors/G_M/Hastie/Code/Chapter3/dup_OSE_all_subsets.R
#
source('../../../../G_M/Hastie/Code/Chapter3/cv_all_subsets.R')
source('../../../../G_M/Hastie/Code/Chapter3/one_standard_error_rule.R')

XTraining = DF

p        = dim(XTraining)[2]-1 # the last column is the response
nSamples = dim(XTraining)[1]

# Do best-subset cross validation:
#

for( k in 0:p ){ 
  print(sprintf('Looking for optimal subsets with %d predictors', k))

  res = cv_all_subsets(k,XTraining,numberOfCV=2)
  if( k==0 ){
    complexityParam      = k
    cvResults            = res[[1]]
    bestPredictorFormula = res[[3]] 
  }else{
    complexityParam = cbind(complexityParam,k) 
    cvResults       = cbind( cvResults, res[[1]] ) # each column is a different value of the complexity parameter ...
                                                   # each row    is a different cross validation sample ... 
    bestPredictorFormula = c( bestPredictorFormula, res[[3]] ) 
  }
}

# group all cvResults by k (the subset size) and compute statistics:
#
OSE = one_standard_error_rule( complexityParam, cvResults )
complexVValue = OSE[[1]]
complexIndex  = OSE[[2]] 
means = OSE[[4]]
stds = OSE[[5]]
oseHValue = OSE[[3]]

if( ! require("gplots") ){ # plotCI, plotmeans found here
    install.packages("gplots")
    library("gplots")
}

#postscript("../../WriteUp/Graphics/Chapter4/ex_4_14_ci_plot.eps", onefile=FALSE, horizontal=FALSE)
plotCI( x=0:p, y=means, uiw=0.5*stds, 
        col="black", barcol="blue", lwd=1, type="l", 
        xlab="subset size (p)", ylab="expected squared prediction error (ESPE)" )
abline( h=oseHValue, lty=2 ) 
abline( v=complexVValue, lty=2 ) 
#dev.off()