opt_lambda_ridge <- function(X,multiple=1){ 
  #
  # Returns the "optimal" set of lambda for nice looking ridge regression plots.  See the
  # notes that accopanies the book Elements of Statistical Learning
  #
  # Input:
  #
  #   X = matrix or data frame of features (do not prepend a constant of ones)
  #
  # Output:
  #   lambdas = vector of optimal lambda (choosen to make the degrees_of_freedom(lambda)
  #             equal to equal to the integers 1:p
  #
  #   Notes: these values of lambda are selected to solve (equation 3.50 for various values of dof)
  #
  #   dof = \sum_{i=1}^p \frac{ d_j^2 }{ d_j^2 + \lambda } 
  #
  # Written by:
  # -- 
  # John L. Weatherwax                2010-02-26
  # 
  # email: wax@alum.mit.edu
  # 
  # Please send comments and especially bug reports to the
  # above email address.
  #   
  #-----

  s  = svd( X )
  dj = s$d       # the diagional elements d_j.  Note that dj[1] is the largest value, while dj[end] is the smallest 
  p  = dim( X )[2]

  # Do degrees_of_freedom=p first (this is the value for lambda when the degrees of freedom is *p*)
  lambdas = 0 

  # Do all other values for the degrees_of_freedom next:
  # 
  kRange  = seq(p-1,1,by=(-1/multiple) )
  for( ki in 1:length(kRange) ){ 
    # solve for lambda in (via newton iterations):
    #   k = \sum_{i=1}^p \frac{ d_j^2 }{ d_j^2 + \lambda }
    #
    k = kRange[ki]
    
    # intialGuess at the root    
    if( ki==1 ){
      xn = 0.0
    }else{
      xn = xnp1 # use the oldest previously computed root 
    }
    
    f    =   sum( dj^2 / ( dj^2 + xn ) ) - k # do the first update by hand
    fp   = - sum( dj^2 / ( dj^2 + xn )^2 ) 
    xnp1 = xn - f/fp 
    
    while( abs(xn - xnp1)/abs(xn) > 10^(-3) ){
      xn   = xnp1
      f    =   sum( dj^2 / ( dj^2 + xn ) ) - k 
      fp   = - sum( dj^2 / ( dj^2 + xn )^2 ) 
      xnp1 = xn - f/fp       
    }

    lambdas = c(lambdas,xnp1)
  }
  # flip the order of the lambdas:
  lambdas = lambdas[ rev(1:length(lambdas)) ]
  return(lambdas)
}