Atkinsons = function( DF, y_name, X_names ){ # # y_column_name = name of the response variable e.g. 'Y' # X_column_name = list of predictor variables e.g. c( 'AGE', 'X' ) # N = dim(DF)[1] # To avoid difficulties with the code below we will delete any columns # in DF not specified in y_name or X_names i.e. we are assuming the user # has fully specified the variables of interest in the arguments to this # function. # col_names = colnames(DF) for( cni in 1:length(col_names) ){ cn = col_names[cni] if( ! ( (cn==y_name) | (cn %in% X_names) ) ){ DF[, cn] = NULL } } y = DF[y_name] # keeps the dataframe structure X = DF[X_names] # Build a linear model using the original predictors: # original_formula = sprintf("%s ~ .", y_name) m0 = lm( original_formula, data=cbind( X, y ) ) # Form the column vector u: # G = prod( y^(1/N) ) stopifnot(G>0) u = y * ( log(y/G) - 1 ) colnames(u) = "U" # Augment the previous model by adding the variable U: # stopifnot( !( "U" %in% colnames(X) ) ) # to avoid name clashes in the next call we cannot have a variable named U as a predictor mU = update(m0, ". ~ . + U", data=cbind( X, y, u )) # The estimate of gamma: # gamma_hat = as.double(coefficients(mU)[length(X_names)+1+1]) # The t-stat of the gamma coefficient (for significance): smU = summary(mU) gamma_t = smU$coefficients[length(X_names)+1+1,3] res=list(gamma_hat=gamma_hat, gamma_t=gamma_t, lambda_hat=1-gamma_hat) } Box_Tidwell = function( DF, y_name, X_names ){ # # This code will perform power transformations on the X variables looking for significant ones. # The code uses the Box-Tidwell transformation discussed in the book in Section 3.4.2 # N = dim(DF)[1] # To avoid difficulties with the code below we will delete any columns # in DF not specified in y_name or X_names i.e. we are assuming the user # has fully specified the variables of interest in the arguments to this # function. # col_names = colnames(DF) for( cni in 1:length(col_names) ){ cn = col_names[cni] if( ! ( (cn==y_name) | (cn %in% X_names) ) ){ DF[, cn] = NULL } } y = DF[y_name] # keeps the dataframe structure X = DF[X_names] stopifnot( !( "U" %in% colnames(X) ) ) # to avoid name clashes in what follows we cannot have a variable named U as a predictor # Build a linear model using all original predictors: # original_formula = sprintf("%s ~ .", y_name) m0 = lm( original_formula, data=cbind( X, y ) ) # Study each predictor one at at time: # predictor_name = c() predictor_gamma = c() predictor_t = c() predictor_alpha = c() # stores the power of the transformation to apply for( ni in 1:length(X_names) ){ stopifnot( all( X[, ni]>0 ) ) # this method only works for positive features nm = X_names[ni] # we will apply the transformation to this feature # The original estimate of beta_1: # beta_1_hat = as.double(coefficients(m0)[1+ni]) # Put in the feature x log(x) # u = data.frame( U=( X[,ni] * log(X[,ni]) ) ) mU = update(m0, ". ~ . + U", data=cbind( X, y, u ) ) # The estimate of gamma (the coefficient of x log(x)): # gamma_hat = as.double(coefficients(mU)[length(X_names)+1+1]) # The_t-stat of the gamma coefficient (for significance): # smU = summary(mU) gamma_t = smU$coefficients[length(X_names)+1+1,3] predictor_name = c( predictor_name, nm ) predictor_gamma = c( predictor_gamma, gamma_hat ) predictor_t = c( predictor_t, gamma_t ) predictor_alpha = c( predictor_alpha, gamma_hat / beta_1_hat + 1 ) } res=data.frame(variable_name=predictor_name, gamma_hat=predictor_gamma, t_value=predictor_t, alpha=predictor_alpha) }