save_plots = FALSE rushing = read.table('../../Data/steelers_rushing.txt', header=TRUE, skip=2) rushing$prob_win = rushing$Won / ( rushing$Won + rushing$Lost ) print(head(rushing)) resp = cbind(rushing$Won, rushing$Lost) ## Does rushing and yards achieved affect the win/loss record: ## logit_model = glm(resp ~ rushing$Rushing_attempts + rushing$Rushing_yards, family=binomial()) print(summary(logit_model)) ## Does the coach matter: ## logit_model = glm(resp ~ rushing$Rushing_attempts + rushing$Coach, family=binomial()) print(summary(logit_model)) ## Using the best model so far (using Rushing_attemps) lets look for outliers: ## logit_model = glm(resp ~ rushing$Rushing_attempts, family=binomial()) print(summary(logit_model)) ## Look for influential outliers using a variety of plots: ## inf_lm = influence(logit_model) plot(inf_lm$pear.res, pch=19, ylab='Pearson residuals') grid() plot(inf_lm$dev.res, pch=19, ylab='Deviance residuals') grid() indx = c(12, 17) ## these look influential from the residual plots print(rushing[indx, ]) hat = inf_lm$hat p_hat = fitted(logit_model) plot(p_hat, hat, xlab='Fitted p', ylab='Hat matrix diagona1') grid() ## cbar: ## cbar = inf_lm$pear.res^2 * ( hat / (1 - hat ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/stats_in_sport_cbar_plot.eps", onefile=FALSE, horizontal=FALSE) } plot(cbar, pch=19, xlab='observation number', ylab='c-bar influence') lines(indx, cbar[indx], type='p', pch=19, col='red') grid() if( save_plots ){ dev.off() } ## difchisq and difdev: ## difchisq = cbar / hat difdev = inf_lm$dev.res^2 + cbar if( save_plots ){ postscript('../../WriteUp/Graphics/Chapter9/stats_in_sport_difchisq_plot.eps', onefile=FALSE, horizontal=FALSE) } plot(difchisq, pch=19, xlab='observation number', ylab='difference in chi-square') lines(indx, difchisq[indx], type='p', pch=19, col='red') grid() if( save_plots ){ dev.off() } #plot(difdev, pch=19, xlab='observation number', ylab='difference in deviance') db = dfbeta(logit_model) plot(db[, 1], xlab='index', ylab='change in intercept') plot(db[, 2], xlab='index', ylab='change in beta_1') grid()