# # EPage 214 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- save_plots = F library(plotrix) library(DAAG) set.seed(0) litters.lm.ridge = lm.ridge( brainwt ~ bodywt + lsize, data=litters ) # split rows randomly into *3* groups: rand = sample(1:20) %% 3 + 1 inds_1 = (1:20)[rand==1] inds_2 = (1:20)[rand==2] inds_3 = (1:20)[rand==3] lambdas = seq( from=0, to=10, length.out=50 ) mse_mean_estimates = rep( 0, length(lambdas) ) mse_std_err_estimates = rep( 0, length(lambdas) ) lmbdaII=1 for( lmbda in lambdas ){ # For this value of lambda do *three* cross validations: # m1 = lm.ridge( brainwt ~ bodywt + lsize, lambda=lmbda, data=litters[-inds_1,] ) X = as.matrix( cbind( rep(1,length(inds_1)), litters[inds_1,1:2] ) ) colnames(X) = NULL yhat = X %*% as.matrix( coefficients(m1) ) y = litters[inds_1,3] mse_1 = mean( (y - yhat)^2 ) m2 = lm.ridge( brainwt ~ bodywt + lsize, lambda=lmbda, data=litters[-inds_2,] ) X = as.matrix( cbind( rep(1,length(inds_2)), litters[inds_2,1:2] ) ) colnames(X) = NULL yhat = X %*% as.matrix( coefficients(m2) ) y = litters[inds_2,3] mse_2 = mean( (y - yhat)^2 ) m3 = lm.ridge( brainwt ~ bodywt + lsize, lambda=lmbda, data=litters[-inds_3,] ) X = as.matrix( cbind( rep(1,length(inds_3)), litters[inds_3,1:2] ) ) colnames(X) = NULL yhat = X %*% as.matrix( coefficients(m3) ) y = litters[inds_3,3] mse_3 = mean( (y - yhat)^2 ) mse_mean_estimates[lmbdaII] = mean( c( mse_1, mse_2, mse_3 ) ) mse_std_err_estimates[lmbdaII] = sqrt( var( c( mse_1, mse_2, mse_3 ) ) )/sqrt(3) lmbdaII = lmbdaII+1 } if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/prob_8_lambda_plot.eps", onefile=FALSE, horizontal=FALSE) } plotCI( x=lambdas, y=mse_mean_estimates, uiw=mse_std_err_estimates, xlab="lambda", ylab="MSE (with CI)" ); if( save_plots ){ dev.off() } # Take the largest value of lambda and retrain using the entire dataset: # lambda_opt = lambdas[length(lambdas)] ridge_model = lm.ridge( brainwt ~ bodywt + lsize, lambda=lambda_opt, data=litters ) print( coefficients(ridge_model) ) # Train using the linear model: # m0 = lm( brainwt ~ bodywt + lsize, data=litters ) print( coefficients( m0 ) ) print( predict( m0, newdata=data.frame(bodywt=7,lsize=10), interval="confidence", level=0.95 ) ) # Part (b): # # Perform a monte-carlo estimate to estimate the confidence interval of the predictions under each model: # ci_of_prediction = function(nboosts=1000, input_data=litters){ n_samples = dim(input_data)[1] n_take = round( 3*n_samples/4 ) ridge_predicted_values = rep( 0, nboosts ) lm_predicted_values = rep( 0, nboosts ) for( ii in 1:nboosts ){ rnd = sample( n_samples )[1:n_take] # get the samples to train with ridge_model = lm.ridge( brainwt ~ bodywt + lsize, lambda=lambda_opt, data=input_data[rnd,] ) lm_model = lm( brainwt ~ bodywt + lsize, data=input_data[rnd,] ) # Make predictions : ridge_predicted_values[ii] = sum( coefficients( ridge_model ) * c( 1, 7, 10 ) ) lm_predicted_values[ii] = predict( lm_model, newdata=data.frame(bodywt=7,lsize=10) ) } ci_ridge = quantile( ridge_predicted_values, probs=c(0.05,0.95) ) ci_lm = quantile( lm_predicted_values, probs=c(0.05,0.95) ) c( mean( ridge_predicted_values ), ci_ridge, mean( lm_predicted_values ), ci_lm ) } print( ci_of_prediction() )