library(stats)

kruskal_wallis_test = function( DF, treatment_key, response_key, alpha=0.05, check_sample_sizes=TRUE, debugging=FALSE ){
  #
  # Returns:
  #   K: the Kruskal Wallis statistic
  # 
  # See Page 618 from the book Probability and Statistics: For Engineering and the Sciences by Jay L. Devore
  #
  # Written by:
  # -- 
  # John L. Weatherwax                2009-04-21
  # 
  # email: wax@alum.mit.edu
  # 
  # Please send comments and especially bug reports to the
  # above email address.
  #
  #-----

  # Get the unique labels of the treatments: 
  treatment_key_column_index = which( colnames(DF) == treatment_key )
  treatment_names = unique( DF[[treatment_key_column_index]] )
 
  response_key_column_index = which( colnames(DF) == response_key )

  N = nrow(DF)
  I = length(treatment_names)
  if( debugging ){
    print( sprintf( "N= %10d", N ) )
    print( sprintf( "I= %10d", I ) )
  }

  # Determine how many measurements J_i are in each treatment type:
  # 
  Ji = c()
  for( ii in 1:I ){
    inds = DF[[treatment_key_column_index]]==treatment_names[ii]
    ni = sum( inds )
    Ji = c( Ji, ni )
  }
  if( debugging ){ 
    print( "Ji= " ); print( Ji ) 
  }

  # Check that conditions are correct for K to be approximatly chi2:
  #
  if( check_sample_sizes ){
    if( I==3 ){
      any_less_than_six = sum( Ji < 6 ) > 1  
      stopifnot( !any_less_than_six )
    }else{
      any_less_than_five = sum( Ji < 5 ) > 1
      stopifnot( !any_less_than_five )
    } 
  }

  # Determine the values of R_i (the sum of the response ranks under treatment i):
  # 
  Ri = c()
  data_rank = rank( DF[[response_key_column_index]] )
  for( ii in 1:I ){
    inds = DF[[treatment_key_column_index]]==treatment_names[ii]
    ri = sum( data_rank[inds] ) 
    Ri = c( Ri, ri )
  }
  if( debugging ){ 
    print( "Ri= " ); print( Ri ) 
  }

  K = ( 12 / ( N*(N+1) ) ) * sum( Ri^2 / Ji ) - 3 * (N+1) 

  k_crit = qchisq( 1 - alpha, I-1 ) 

  # Compute the P-value for this test:
  #
  p_value = 1 - pchisq( K, I-1 )

  list( Ji=Ji, Ri=Ri, K=K, k_crit=k_crit, p_value=p_value )

}