#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
#-----

#
# cor.prob() function to get a matrix of correlations (lower diagonal) and p-Values (upper diagonals):
#
cor.prob = function(X, dfr=nrow(X)-2){
    R = cor(X)
    above = row(R) = col(R)
    r2 = R[above]^2
    Fstat = r2 * dfr/(1 - r2)
    R[above] = 1 - pf(Fstat, 1, dfr)
    R[row(R) == col(R)] = NA
    R
}

#
# Extract the largest correlations in a dataframe
# 
largest_cors = function(X){
    pred_cors = cor( X )

    # Convert the matrix above into a dataframe with an entry for each element of the correlation matrix:
    cor_DF = data.frame( first_variable=rep(rownames(pred_cors),ncol(pred_cors)),
                         second_variable=rep(colnames(pred_cors),each=nrow(pred_cors)),
                         cor=as.vector(pred_cors) )

    upper_mask = as.vector(upper.tri(pred_cors)) # which elements are on the upper triangular (to avoid duplicates)

    # Order these pairwise-correlation by their absolute correlation:
    #
    pairwise_cor = cor_DF[upper_mask,]
    pairwise_cor$abs_cor = abs(pairwise_cor$cor)
    pairwise_cor = pairwise_cor[ order( pairwise_cor$abs_cor, decreasing=TRUE ), ]
    pairwise_cor$abs_cor = NULL
    pairwise_cor
}