source('../../Data/get_lottery_data.R') DF = get_lottery_data() DF$logPop = log(DF$pop) ## model the logarithm of the population DF$popDensity = DF$pop/DF$area m = glm(winners ~ logPop + area + mill, family=poisson, data=DF) print(summary(m)) nd = data.frame(pop=c(126000, 6000)/1e3, area=c(21.1, 23.3), mill=c(37.0, 26.4)) ## new data nd$logPop = log(nd$pop) p = predict(m, newdata=nd, type='response') print(p) ## How extreme our the two samples we need to predict: ## print(range(DF$pop)) print(range(DF$mill)) ## Which towns are luckier than others: ## DF$mResidual = residuals(m) DF = DF[order(DF$mResidual),] ## order by the model residual print(DF[, c('town', 'mResidual')])