source('../../Data/get_arsenic_data.R') DF = get_arsenic_data() m2 = lm(Arsenic_in_nail_ppm ~ 0 + Arsenic_in_water_ppm, data=DF) print(summary(m2)) par(mfrow=c(2, 1)) ## Plot the data and a linear fit: ## plot(DF$Arsenic_in_water_ppm, DF$Arsenic_in_nail_ppm, type='p', pch=19, xlab='Arsenic in water (ppm)', ylab='Arsenic in toenail (ppm)') grid() abline(m2, col='green') ## Plot the residuals: ## plot(fitted(m2), residuals(m2), type='p', pch=19, xlab='fitted values', ylab='residuals') abline(h=0, lty=1, col='black') grid() par(mfrow=c(1, 1)) DF$residuals = residuals(m2) rows = rownames(DF) wm = which.max(DF$Arsenic_in_nail_ppm) print(sprintf('index= %s has maximum arsenic exposure of= %f', rows[wm], DF$Arsenic_in_nail_ppm[wm])) wm = which.max(abs(DF$residuals)) print(sprintf('index= %s has maximum abs residual of= %f', rows[wm], DF$residual[wm])) ## Try to refit dropping the sample with the largest arsenic value: ## print(coefficients(m2)) m3 = lm(Arsenic_in_nail_ppm ~ 0 + Arsenic_in_water_ppm, data=DF[c(-14), ]) print(coefficients(m3))