source('../../Data/get_university_endowment_data.R') DF = get_university_endowment_data() ##DF$Endowment = DF$Endowment - mean(DF$Endowment) ## subtract the mean ##DF$Endowment = log(DF$Endowment) ## Plot the data: ## pairs(DF[, c(-1)]) m = lm(Endowment ~ Percent_Change + Number_Students, data=DF) print(summary(m)) ## Plot the hat matrix diagonal against the fitted values: ## inf = influence(m) plot(m$fitted.values, inf$hat, type='p', pch=19, xlab='Fitted values', ylab='Hat diagonals') ext = match(sort(inf$hat)[dim(DF)[1] - (0:3)], inf$hat) text(m$fitted.values[ext], inf$hat[ext], labels=row.names(DF)[ext], pos=c(4, 4, 4, 4)) grid() ## Based on index=5 having the largest hat value we will drop this sample and refit assuming it is "incorrect": ## m1 = lm(Endowment ~ Percent_Change + Number_Students, data=DF[c(-5), ]) print(summary(m1))