# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- # # cor.prob() function to get a matrix of correlations (lower diagonal) and p-Values (upper diagonals): # cor.prob = function(X, dfr=nrow(X)-2){ R = cor(X) above = row(R) = col(R) r2 = R[above]^2 Fstat = r2 * dfr/(1 - r2) R[above] = 1 - pf(Fstat, 1, dfr) R[row(R) == col(R)] = NA R } # # Extract the largest correlations in a dataframe # largest_cors = function(X){ pred_cors = cor( X ) # Convert the matrix above into a dataframe with an entry for each element of the correlation matrix: cor_DF = data.frame( first_variable=rep(rownames(pred_cors),ncol(pred_cors)), second_variable=rep(colnames(pred_cors),each=nrow(pred_cors)), cor=as.vector(pred_cors) ) upper_mask = as.vector(upper.tri(pred_cors)) # which elements are on the upper triangular (to avoid duplicates) # Order these pairwise-correlation by their absolute correlation: # pairwise_cor = cor_DF[upper_mask,] pairwise_cor$abs_cor = abs(pairwise_cor$cor) pairwise_cor = pairwise_cor[ order( pairwise_cor$abs_cor, decreasing=TRUE ), ] pairwise_cor$abs_cor = NULL pairwise_cor }