# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- save_plots = F # Regression with ARMA Noise: # library(AER) data("USMacroG") MacroDiff = as.data.frame(apply(USMacroG,2,diff)) attach(MacroDiff) fit1 = arima( unemp, order=c(1,0,0), xreg=cbind(invest,government) ) fit1_resids = residuals( fit1 ) fit2 = lm( unemp ~ invest+government ) fit2_resids = residuals( fit2 ) print( sprintf("AIC(arima fit)= %10.3f; AIC(lm fit)= %10.3f", AIC(fit1), AIC(fit2)) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter14/arima_vs_lm.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(1,2)) acf( fit1_resids, main="arima fit" ) acf( fit2_resids, main="lm fit" ) if( save_plots ){ dev.off() } # Compute the BIC for each model: n = length(unemp) # same for both models p = length(fit1$coef) - 1 # subtract one for the intercept fit1_bic = fit1$aic + (log(n) - 2)*p p = length(fit2$coef) - 1 # subtract one for the intercept fit2_bic = AIC(fit2) + (log(n) - 2)*p print( sprintf("BIC(arima fit)= %10.3f; BIC(lm fit)= %10.3f", fit1_bic, fit2_bic) ) # Lets try an AR(2) model: # fit3 = arima( unemp, order=c(2,0,0), xreg=cbind(invest,government) ) fit3_bic = fit3$aic + (log(n) - 2)*(length(fit3$coef) - 1) fit4 = arima( unemp, order=c(1,0,1), xreg=cbind(invest,government) ) fit4_bic = fit4$aic + (log(n) - 2)*(length(fit4$coef) - 1) print( sprintf("BIC(arima(2,0,0))= %10.3f; BIC(arima(1,0,1))= %10.3f; ", fit3_bic, fit4_bic) ) # Nonlinear Regression: # library(Ecdat) data(Irates) r1 = Irates[,1] n = length(r1) lag_r1 = lag(r1)[-n] delta_r1 = diff(r1) n = length(lag_r1) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter14/nonlin_regression.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(3,2)) plot(r1,main="(a)") plot(delta_r1,main="(b)") plot(delta_r1^2,main="(c)") plot(lag_r1,delta_r1,main="(d)") plot(lag_r1,delta_r1^2,main="(e)") if( save_plots ){ dev.off() } nlmod_CKLS = nls( delta_r1 ~ a * (theta-lag_r1), start=list(theta=5, a=0.01), control=list(maxiter=200) ) param = summary(nlmod_CKLS)$parameters[,1] par(mfrow=c(2,2)) t = seq( from=1946, to=1991+2/12, length=n ) plot( lag_r1, ylim=c(0,16), ylab="rate and theta", main="(a)", type="l" ) abline( h=param[1], lwd=2, col="red" ) res_sq = residuals(nlmod_CKLS)^2 nlmod_CKLS_res = nls( res_sq ~ A * lag_r1^B, start=list(A=0.2,B=1/2) ) param2 = summary(nlmod_CKLS_res)$parameters[,1] plot( lag_r1, sqrt(res_sq), pch=5, ylim=c(0,6), main="(b)" ) attach( as.list(param2) ) curve( sqrt( A*x^B ), add=T, col="red", lwd=3 ) nlmod_CKLS_wt = nls( delta_r1 ~ a * (theta-lag_r1), start=list(theta=5,a=0.01), control=list(maxiter=200), weights=1/fitted(nlmod_CKLS_res) ) plot( lag_r1, ylim=c(0,16), ylab="rate and theta", main="(c)", type="l") param3 = summary(nlmod_CKLS_wt)$parameters[,1] abline(h=param3[1],lwd=2,col="red") # Response Transformation: # library(AER) data(HousePrices) fit1 = lm( price ~ ., data=HousePrices) summary(fit1) library(MASS) fit2 = boxcox( fit1, xlab=expression(alpha) ) mi = which.max( fit2$y ) ml_alpha = fit2$x[mi] library(car) fit3 = lm( bcPower(price,ml_alpha) ~ ., data=HousePrices) summary(fit3) AIC(fit1) AIC(fit3) print( sprintf("AIC(fit1)= %10.6f, AIC(fit3)= %10.6f", AIC(fit1), AIC(fit3)) ) # Untangle the bcPower function to get the actual housing prices: plot( ( ml_alpha * fitted.values(fit3) + 1 )^(1/ml_alpha), HousePrices$price ) # Is the variance stable: plot( fitted.values(fit3), residuals(fit3) ) # Are the residuals autocorrelated: acf( residuals(fit3) ) # Who Owns an Air Conditioner? # library(AER) data(HousePrices) fit1 = glm( aircon ~ ., family="binomial", data=HousePrices ) summary(fit1) library(MASS) fit2 = stepAIC(fit1) summary(fit2) # Lets remove bathrooms and look at the model we get: fit3 = glm( aircon ~ price + stories + gasheat, family = "binomial", data = HousePrices) summary(fit3) # The given sample looks like the first row in the insample training dataframe: # samp = HousePrices[1,] # get a dataframe with one row plogis( predict( fit1, newdata=samp ) )