source('utils.R')
source('../Chapter3/load_prostate_data.R')

## Cross-va1idation results:
##
##source('../Chapter3/dup_OSE_all_subsets.R')

PD = load_prostate_data(globalScale=FALSE, trainingScale=FALSE, responseScale=FALSE) # read in unscaled data
XTraining = PD[[1]]
XTesting = PD[[2]]

pp = dim(XTraining)[2]-1 # the last column is the response
nSamples = dim(XTraining)[1]

full_model = lm(lpsa ~ ., data=XTraining)

## AIC stepwise selection:
##
step(full_model, k=2)

## BIC stepwise selction:
##
step(full_model, k=log(nSamples))

## Estimate the prediction error using the "0.632 method"
##
err = estimate_632_Err(XTraining, c('lcavol', 'lweight'), 'lpsa', B=500)

mk = lm(lpsa ~ lcavol + lweight, data=XTraining)
y_hat = predict(mk, newdata=XTesting)
mse_test = mean((y_hat - XTesting[, 'lpsa'])^2)
print(sprintf('B= %d; err_bar= %.2f; err_1= %.2f; (0.632 estimate)= %.2f; mse_test= %.2f', err$B, err$err_bar, err$err_1, err$estimate, mse_test))