load_prostate_data <- function(globalScale=FALSE,trainingScale=TRUE,responseScale=FALSE){ 
  #
  # R code to load in the prostate data set from the book ESLII
  #
  # Output:
  #
  # res: list of data frames XT
  #
  # Written by:
  # -- 
  # John L. Weatherwax                2009-04-21
  # 
  # email: wax@alum.mit.edu
  # 
  # Please send comments and especially bug reports to the
  # above email address.
  # 
  #-----

  X  = read.table("../../Data/prostate.data")
  
  #
  # Based on the comments in the file prostate.info we try to scale ALL
  # features so that they have mean one and standard deviation of one.
  #
  # This would seem to violate the idea of separating training and
  # testing since the testing features would contributed to the mean and
  # variance used in scaling 
  #
  if( globalScale ){
    if( responseScale ){
      lpsa = X$lpsa - mean(X$lpsa) 
    }else{
      lpsa = X$lpsa 
    }
    train = X$train
    X$lpsa = NULL
    X$train = NULL 
    X = scale(X, TRUE, TRUE)
    Xf = data.frame(X)
    Xf$lpsa = lpsa
    Xf$train = train
    X = Xf
    rm(Xf)
    rm(lpsa)
  }

  # separate into training/testing sets
  # 
  XTraining = subset( X, train )
  XTraining$train = NULL # remove the training/testing column
  p = dim(XTraining)[2]-1
  
  XTesting  = subset( X, train==FALSE )
  XTesting$train = NULL # remove the training/testing column

  #
  # Sometime data is processed and stored in a certain order.  When doing cross validation
  # on such data sets we don't want to bias our results if we grab the first or the last samples.
  # Thus we randomize the order of the rows in the Training data frame to make sure that each
  # cross validation training/testing set is as random as possible.
  # 
  if( FALSE ){
    nSamples = dim(XTraining)[1] 
    inds = sample( 1:nSamples, nSamples )
    XTraining = XTraining[inds,]
  }

  #
  # In reality we have to estimate everything based on the training data only
  # Thus here we estimate the predictor statistics using the training set
  # and then scale the testing set by the same statistics
  # 
  if( trainingScale ){
    X = XTraining 
    if( responseScale ){
      meanLpsa = mean(X$lpsa) 
      lpsa = X$lpsa - meanLpsa 
    }else{
      lpsa = X$lpsa 
    }
    X$lpsa = NULL
    X = scale(X, TRUE, TRUE)
    means = attr(X,"scaled:center")
    stds = attr(X,"scaled:scale")
    Xf = data.frame(X)
    Xf$lpsa = lpsa
    XTraining = Xf

    # scale the testing predictors by the same amounts:
    # 
    DCVTest  = XTesting
    if( responseScale ){
      lpsaTest = DCVTest$lpsa - meanLpsa
    }else{
      lpsaTest = DCVTest$lpsa # in physical units (not mean adjusted)
    }
    DCVTest$lpsa = NULL 
    DCVTest  = t( apply( DCVTest, 1, '-', means ) ) 
    DCVTest  = t( apply( DCVTest, 1, '/', stds ) ) 
    DCVTestb = cbind( DCVTest, lpsaTest ) # append back on the response
    DCVTestf = data.frame( DCVTestb ) # a data frame containing all scaled variables of interest
    names(DCVTestf)[p+1] = "lpsa" # fix the name of the response
    XTesting = DCVTestf
  }

  return( list( XTraining, XTesting ) ) 
}