source('../../Data/data_loaders.R') # Part (a): # # Do the computation "by hand": # EPage 123 # # Order of variables is: CLOT PROG ENZ LIV # data = c( 1, 0.09, -0.15, 0.502, 0.09, 1, -0.024, 0.369, -0.15, -0.024, 1, 0.416, 0.502, 0.369, 0.416, 1 ) XTX = matrix( data=data, nrow=4, ncol=4, byrow=FALSE ) colnames(XTX) = c("CLOT", "FROG", "ENZ", "LIV") rownames(XTX) = c("CLOT", "PROG", "ENZ", "LIV") XTy = c( 0.373, 0.554, 0.58, 0.722 ) # these are the correlations with ( y - y bar J ) and the standardized features by_hand = solve( XTX, XTy ) print(XTX) print(XTy) # Check our results using the function lm: # DF = load_appendix_survival_data() cm = colMeans(DF) cs = apply(DF, 2, sd) DF_standardized = sweep( sweep( DF, 2, cm, FUN='-' ), 2, cs, FUN='/' ) # Here all the variables are standardized (even time): # Note that we don't strictly need to include the -l in the lm command # below but if we don't we will get an "Intercept" term which is effectively zero. # ms = lm( TIME ~ CLOT + PROG + ENZ + LIV - 1, data=DF_standardized ) by_lm = coefficients( ms ) print( rbind( by_hand, by_lm ) )