save_figs = FALSE if(!require('MASS')){install.packages('MASS')} library(MASS) ##if(!require('car')){install.packages('car')} ##library(car) source('utils.R') source('../../Data/data_loaders.R') DF = load_appendix_dilemma_data() m = dim(DF)[2]-1 ## number of predictors N = dim(DF)[1] ## number of samples ## Drop the 27th sample: ## DF = DF[-27,] N = dim(DF)[1] if( FALSE ){ pairs(DF) } ## Part (a): ## regr = lm(Y ~ ., data=DF[-c(24), ]); N = dim(DF[-c(24), ])[1] print('ANOVA Table:') print(anova_alt(regr)) print('LM summary:') print(summary(regr)) ## Outliers are points where the 'residuals' are large: ## print('Different types of residuals:') df = data.frame(residuals=residuals(regr), rstandard=rstandard(regr), studres=studres(regr)) df_sorted = df[order(df$residuals), ] print(df_sorted) ##plot(regr) print('Adding the hat diagonals and resorting:') ; df$hii = lm.influence(regr)$hat df_sorted = df[order(df$hii), ] print(df_sorted) print('hii threshold=') print(2*(m+1)/N) print('Adding Cooks distance D_i and resorting:') df$Di = cooks.distance(regr) df_sorted = df[order(df$Di), ] print(df_sorted) df_sorted = df[order(df$residuals), ] print(df_sorted) ## Part (b): ## CXY = cor(DF[-c(24, 17), -c(4)]) ## <- drop the response print(CXY) ES = eigen(CXY) print('Eigenvalues:') print(round(ES$values, 3)) print('Eigenvectors:') print(round(ES$vectors, 3)) ## Fit the requested model: ## regr = lm(Y ~ ., data=DF[-c(24, 17), ]); N = dim(DF[-c(24, 17), ])[1] if( FALSE ){ library(car) ## <- to get vif print('VIF=') print(vif(regr)) } ## Part (c): ## ## Build the model without X3: ## regr = lm(Y ~ X1 + X2, data=DF) N = dim(DF)[1] m = 2 # number of predictors print('LM summary:') print(summary(regr)) ## Outliers are points where the 'residuals' are large: ## print('Residuals of the model Y ~ X1 + X2:') df = data.frame(residuals=residuals(regr), rstandard=rstandard(regr), studres=studres(regr)) df_sorted = df[order(df$residuals), ] print(df_sorted) ##plot(regr) print('Adding the hat diagonals and resorting:') ; df$hii = lm.influence(regr)$hat df_sorted = df[order(df$hii), ] print(df_sorted) print('hii threshold=') print(2*(m+1)/N) print('Adding Cooks distance D_i and resorting:') df$Di = cooks.distance(regr) df_sorted = df[order(df$Di), ] print(df_sorted)