rda = function( XTrain, yTrain, XTest, yTest, alpha=1.0, gamma=1.0 ){
  #
  # R code to implement classification using Regularized Discriminant Analysis
  #
  # See the section with the same name as this function in Chapter 4 from the book ESLII
  #
  # Inputs:
  #   XTrain = training data frame
  #   yTrain = training labels of true classifications with indices 1 - K (where K is the number of classes)
  #   xTest = testing data frame
  #   yTest = testing response
  #
  # Note that
  #   gamma, alpha = (1.0, 1.0) gives quadratic discriminant analysis
  #   gamma, alpha = (1.0, 0.0) gives linear discriminant analysis
  #
  # Written by:
  # -- 
  # John L. Weatherwax                2009-04-21
  # 
  # email: wax@alum.mit.edu
  # 
  # Please send comments and especially bug reports to the
  # above email address.
  # 
  #-----

  # Check that our class labels are all positive: 
  stopifnot( all( yTrain>0 ) )
  stopifnot( all( yTest>0 ) )
  
  K = length(unique(yTrain)) # the number of classes (expect the classes to be labeled 1, 2, 3, ..., K-1, K 
  N = dim( XTrain )[1] # the number of samples
  p = dim( XTrain )[2] # the number of features 

  # Estimate \hat{sigma}^2 variance of all features:
  #
  XTM = as.matrix( XTrain )
  dim(XTM) = prod(dim(XTM)) # we now have all data in one vector
  sigmaHat2 = var(XTM)

  # Compute the class independent covariance matrix:
  #
  SigmaHat = cov(XTrain)

  # Compute the class dependent mean vector and covariance matrices:
  #
  PiK = list()
  MuHatK = list()
  SigmaHatK = list()
  for( ci in 1:K ){
    inds = yTrain == ci
    PiK[[ci]] = sum(inds)/N
    MuHatK[[ci]] = as.matrix( mean( XTrain[ inds, ] ) )
    SigmaHatK[[ci]] = cov( XTrain[ inds, ] )
  }

  # Blend the covariances as specified by Regularized Discriminant Analysis:
  #
  RDA_SigmaHatK = list()
  for( ci in 1:K ){
    RDA_SigmaHatK[[ci]] = alpha * SigmaHatK[[ci]] + ( 1 - alpha ) * ( gamma * SigmaHat + ( 1 - gamma ) * sigmaHat2 * diag(p) )
  }

  # Compute some of the things needed for classification via the discriminant functions:
  #
  RDA_SigmaHatK_Det = list()
  RDA_SigmaHatK_Inv = list()
  for( ci in 1:K ){
    RDA_SigmaHatK_Det[[ci]] = det(RDA_SigmaHatK[[ci]])
    RDA_SigmaHatK_Inv[[ci]] = solve(RDA_SigmaHatK[[ci]]) # there are numerically better ways of doing this but ... 
  }

  # Classify Training data:
  #
  XTM = t(as.matrix( XTrain )) # dim= p x N 
  CDTrain = matrix( data=0, nrow=N, ncol=K ) # CDTrain = training class discriminants
  for( ci in 1:K ){
    MU = repmat( MuHatK[[ci]], 1, N ) # dim= p x N
    X_minus_MU = XTM - MU # dim= p x N
    SInv = RDA_SigmaHatK_Inv[[ci]] # dim= p x p
    SX = SInv %*% X_minus_MU # dim= ( p x N )
    for( si in 1:N ){
      CDTrain[si,ci] = -0.5 * log(RDA_SigmaHatK_Det[[ci]]) - 0.5 * t(X_minus_MU[,si]) %*% SX[,si] + PiK[[ci]]
    }
  }
  yHatTrain = apply( CDTrain, 1, which.max )
  errRateTrain = sum( yHatTrain != yTrain )/N

  # Classify Testing data:
  #
  N = dim( XTest )[1] 
  XTM = t(as.matrix( XTest )) # dim= p x N 
  CDTest = matrix( data=0, nrow=N, ncol=K ) # CDTest = testing class discriminants
  for( ci in 1:K ){
    MU = repmat( MuHatK[[ci]], 1, N ) # dim= p x N
    X_minus_MU = XTM - MU # dim= p x N
    SInv = RDA_SigmaHatK_Inv[[ci]] # dim= p x p
    SX = SInv %*% X_minus_MU # dim= ( p x N )
    for( si in 1:N ){
      CDTest[si,ci] = -0.5 * log(RDA_SigmaHatK_Det[[ci]]) - 0.5 * t(X_minus_MU[,si]) %*% SX[,si] + PiK[[ci]]
    }
  }
  yHatTest = apply( CDTest, 1, which.max )
  errRateTest = sum( yHatTest != yTest )/N
  
  return( list(yHatTrain,errRateTrain, yHatTest,errRateTest) )

}