#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

save_plots = F


#
# Nonlinear Regression: 
# 
library(Ecdat)
data(Irates)
r1 = Irates[,1]
n = length(r1)
lag_r1 = lag(r1)[-n]
delta_r1 = diff(r1)
n = length(lag_r1)

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter11/nonlin_regression.eps", onefile=FALSE, horizontal=FALSE) }

par(mfrow=c(3,2))
plot(r1,main="(a)")
plot(delta_r1,main="(b)")
plot(delta_r1^2,main="(c)")
plot(lag_r1,delta_r1,main="(d)")
plot(lag_r1,delta_r1^2,main="(e)")

if( save_plots ){ dev.off() }

nlmod_CKLS = nls( delta_r1 ~ a * (theta-lag_r1), start=list(theta=5, a=0.01), control=list(maxiter=200) )
param = summary(nlmod_CKLS)$parameters[,1]
par(mfrow=c(2,2))
t = seq( from=1946, to=1991+2/12, length=n )
plot( lag_r1, ylim=c(0,16), ylab="rate and theta", main="(a)", type="l" )
abline( h=param[1], lwd=2, col="red" )

res_sq = residuals(nlmod_CKLS)^2
nlmod_CKLS_res = nls( res_sq ~ A * lag_r1^B, start=list(A=0.2,B=1/2) )
param2 = summary(nlmod_CKLS_res)$parameters[,1]
plot( lag_r1, sqrt(res_sq), pch=5, ylim=c(0,6), main="(b)" )
attach( as.list(param2) )
curve( sqrt( A*x^B ), add=T, col="red", lwd=3 )

param2_std_err = summary(nlmod_CKLS_res)$parameters[,2]

# The confidence interval for sigma:
#
param2_A_ci = param2[1] + param2_std_err[1] * qnorm(1-0.05/2) * c(-1, +1)
param2_A_ci[1] = max(param2_A_ci[1], 0.0) # can't have a negative variance
sigma_ci = sqrt(param2_A_ci)
print('sigma ci:')
print(sigma_ci)

# The confidence interval for gamma:
#
param2_B_ci = param2[2] + param2_std_err[2] * qnorm(1-0.05/2) * c(-1, +1)
gamma_ci = param2_B_ci/2
print('gamma ci:')
print(gamma_ci)


nlmod_CKLS_wt = nls( delta_r1 ~ a * (theta-lag_r1), start=list(theta=5,a=0.01), control=list(maxiter=200), weights=1/fitted(nlmod_CKLS_res) )
plot( lag_r1, ylim=c(0,16), ylab="rate and theta", main="(c)", type="l") 
param3 = summary(nlmod_CKLS_wt)$parameters[,1]
abline(h=param3[1],lwd=2,col="red")


#
# Response Transformation:
# 
library(AER)
data(HousePrices)
fit1 = lm( price ~ ., data=HousePrices)
summary(fit1)

library(MASS)
fit2 = boxcox( fit1, xlab=expression(alpha) )

mi = which.max( fit2$y )
ml_alpha = fit2$x[mi]

library(car)
fit3 = lm( bcPower(price,ml_alpha) ~ ., data=HousePrices)
summary(fit3)

AIC(fit1)
AIC(fit3)
print( sprintf("AIC(fit1)= %10.6f, AIC(fit3)= %10.6f", AIC(fit1), AIC(fit3)) )

# Untangle the bcPower function to get the actual housing prices: 
plot( ( ml_alpha * fitted.values(fit3) + 1 )^(1/ml_alpha), HousePrices$price )

# Is the variance stable: 
plot( fitted.values(fit3), residuals(fit3) )

# Are the residuals autocorrelated:
acf( residuals(fit3) )


#
# Who Owns an Air Conditioner?
#
library(AER)
data(HousePrices)
fit1 = glm( aircon ~ ., family="binomial", data=HousePrices )
summary(fit1)

library(MASS)
fit2 = stepAIC(fit1)
summary(fit2)

# Lets remove bathrooms and look at the model we get:
fit3 = glm( aircon ~ price + stories + gasheat, family = "binomial", data = HousePrices)
summary(fit3)

# The given sample looks like the first row in the insample training dataframe: 
#
samp = HousePrices[1,] # get a dataframe with one row
plogis( predict( fit1, newdata=samp ) )