opt_lambda_ridge <- function(X,multiple=1){ # # Returns the "optimal" set of lambda for nice looking ridge regression plots. See the # notes that accopanies the book Elements of Statistical Learning # # Input: # # X = matrix or data frame of features (do not prepend a constant of ones) # # Output: # lambdas = vector of optimal lambda (choosen to make the degrees_of_freedom(lambda) # equal to equal to the integers 1:p # # Notes: these values of lambda are selected to solve (equation 3.50 for various values of dof) # # dof = \sum_{i=1}^p \frac{ d_j^2 }{ d_j^2 + \lambda } # # Written by: # -- # John L. Weatherwax 2010-02-26 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- s = svd( X ) dj = s$d # the diagional elements d_j. Note that dj[1] is the largest value, while dj[end] is the smallest p = dim( X )[2] # Do degrees_of_freedom=p first (this is the value for lambda when the degrees of freedom is *p*) lambdas = 0 # Do all other values for the degrees_of_freedom next: # kRange = seq(p-1,1,by=(-1/multiple) ) for( ki in 1:length(kRange) ){ # solve for lambda in (via newton iterations): # k = \sum_{i=1}^p \frac{ d_j^2 }{ d_j^2 + \lambda } # k = kRange[ki] # intialGuess at the root if( ki==1 ){ xn = 0.0 }else{ xn = xnp1 # use the oldest previously computed root } f = sum( dj^2 / ( dj^2 + xn ) ) - k # do the first update by hand fp = - sum( dj^2 / ( dj^2 + xn )^2 ) xnp1 = xn - f/fp while( abs(xn - xnp1)/abs(xn) > 10^(-3) ){ xn = xnp1 f = sum( dj^2 / ( dj^2 + xn ) ) - k fp = - sum( dj^2 / ( dj^2 + xn )^2 ) xnp1 = xn - f/fp } lambdas = c(lambdas,xnp1) } # flip the order of the lambdas: lambdas = lambdas[ rev(1:length(lambdas)) ] return(lambdas) }