#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

save_plots = FALSE

set.seed(0) 

# P 1: EPage 123
#
library("Ecdat")
#?CPSch3
data(CPSch3)
dimnames(CPSch3)[[2]]

male.earnings = CPSch3[CPSch3[,3]=="male",2]
sqrt.male.earnings = sqrt(male.earnings)
log.male.earnings = log(male.earnings)
one.fourth.male.earnings = male.earnings^(1/4)

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/chap_5_prob_1_qqnorm.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(2,2))
qqnorm(male.earnings, datax=T, main="untransformed")
qqnorm(sqrt.male.earnings, datax=T, main="sqruare-root transformed")
qqnorm(log.male.earnings, datax=T, main="log-transformed")
qqnorm(one.fourth.male.earnings, datax=T, main="1/4 power")
if( save_plots ){ dev.off() }

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/chap_5_prob_1_boxplot.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(2,2))
boxplot(male.earnings, main="untransformed")
boxplot(sqrt.male.earnings, main="sqruare-root transformed")
boxplot(log.male.earnings, main="log-transformed")
boxplot(one.fourth.male.earnings, main="1/4 power")
if( save_plots ){ dev.off() }

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/chap_5_prob_1_density_plot.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(2,2))
plot(density(male.earnings), main="untransformed")
plot(density(sqrt.male.earnings), main="sqruare-root transformed")
plot(density(log.male.earnings), main="log-transformed")
plot(density(one.fourth.male.earnings), main="1/4 power")
if( save_plots ){ dev.off() }


# P 2: EPage 123
#
library(MASS)
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/chap_5_prob_1_boxcox_plot.eps", onefile=FALSE, horizontal=FALSE) }
par(mfrow=c(1,1))
bc = boxcox( male.earnings~1 )
bc = boxcox( male.earnings~1, lambda=seq(0.3, 0.45, 1/100), interp=FALSE )
if( save_plots ){ dev.off() }

ind = (bc$y == max(bc$y))
ind2 = (bc$y > max(bc$y) - qchisq(0.95, df=1)/2)

mli = which.max( bc$y ) # what is the largest value of the likelihood
ml_lam = bc$x[mli] # extract the value of lambda that gives the largest likelihood
print(sprintf("Maxlikihood lambda value= %10.6f", ml_lam))
print(range(bc$x[ind2]))

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/chap_5_prob_1_boxcox_density_plot.eps", onefile=FALSE, horizontal=FALSE) }
library(car)
plot( density( bcPower( male.earnings, ml_lam ) ) ) # plot the transformed variables
if( save_plots ){ dev.off() }


# P 3: EPage 125
#
library("fGarch")
fit_sstd = sstdFit( male.earnings, hessian=T )


# P 4: EPage 125
#
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/rlab_prob_4.eps", onefile=FALSE, horizontal=FALSE) }
plot( density( male.earnings ), type="l", xlab="x", ylab="density" )
x_values = seq( min(male.earnings), max(male.earnings), length.out=100 )
y_values = dsstd( x_values, mean=fit_sstd$estimate[1], sd=fit_sstd$estimate[2], nu=fit_sstd$estimate[3], xi=fit_sstd$estimate[4] )
lines(x_values,y_values,lwd=2,lty=5)
legend("topright",c("KDE","sstd"),lwd=2,lty=c(1,5))
if( save_plots ){ dev.off() }


# P 5: EPage 125
#
fit_sged = sgedFit( male.earnings ) 

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/rlab_prob_5.eps", onefile=FALSE, horizontal=FALSE) }
plot( density( male.earnings ), type="l", xlab="x", ylab="density" )
x_values = seq( min(male.earnings), max(male.earnings), length.out=100 )
y_values = dsged( x_values, mean=fit_sged$par[1], sd=fit_sged$par[2], nu=fit_sged$par[3], xi=fit_sged$par[4] )
lines(x_values,y_values,lwd=2,lty=5)
legend("topright",c("KDE","sged"),lwd=2,lty=c(1,5))
if( save_plots ){ dev.off() }

# which model is a better fit: 
c( fit_sstd$minimum, fit_sged$objective )


# P 6: EPage 125
#
data(Garch,package="Ecdat")
library("fGarch")
data(EuStockMarkets)
Y = diff(log(EuStockMarkets[,1])) # retuns on the DAX index 
#### fit a std density ####
loglik_std = function(x){
    f = -sum(log(dstd(Y, mean=x[1], sd=x[2], nu=x[3])))
    f
}
start = c(mean(Y), sd(Y), 4) # optimization starting point 
fit_std = optim( start, loglik_std, method="L-BFGS-B", lower=c(-0.1,0.001,2.1), upper=c(+0.1,1.0,20) )
print(c("MLE=", round(fit_std$par, digits=5)))
m_logL_std = fit_std$value # minus the log-likelihood
AIC_std = 2*m_logL_std + 2 * length(fit_std$par)


# P 7: EPage 125
#
#### fit a sstd density ####
loglik_sstd = function(x){
    f = -sum(log(dsstd(Y, mean=x[1], sd=x[2], nu=x[3], xi=x[4])))
    f
}
start = c(mean(Y), sd(Y), 4, 1.5) # optimization starting point 
fit_sstd = optim( start, loglik_sstd, method="L-BFGS-B", lower=c(-0.1,0.001,2.1,0.5), upper=c(+0.1,1.0,20.0,3.0) )
print(c("MLE=", round(fit_sstd$par, digits=5)))
m_logL_sstd = fit_sstd$value # minus the log-likelihood
AIC_sstd = 2*m_logL_sstd + 2 * length(fit_sstd$par)

print( c(AIC_sstd, AIC_sstd) )


# P 8: EPage 125
#
# Use the central (not skewed) t-distribution from a previous problem i.e. the parameters in fit_std 
#
x1 = pstd( Y, mean=fit_std$par[1], sd=fit_std$par[2], nu=fit_std$par[3] )
x = qnorm(x1)

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/rlab_prob_8_KDE_N_TKDE.eps", onefile=FALSE, horizontal=FALSE) }
d1 = density(Y)
plot(d1$x,d1$y,type="l",xlab="y",ylab="Density(y)")
d2 = density(x)
ginvx = qstd(pnorm(d2$x), mean = fit_std$par[1], sd = fit_std$par[2], nu = fit_std$par[3])
gprime_num = dstd(ginvx,mean = fit_std$par[1], sd = fit_std$par[2], nu = fit_std$par[3])
gprime_den = dnorm(qnorm(pstd(ginvx,mean = fit_std$par[1], sd = fit_std$par[2], nu = fit_std$par[3])))
gprime = gprime_num/gprime_den
lines(ginvx,d2$y*gprime,type="l",lty=2)
legend("topleft",c("KDE","TKDE"),lty=c(1,2),lwd=2,cex=1.5)
if( save_plots ){ dev.off() }

# Zoom in to a specific region: 
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter5/rlab_prob_8_KDE_N_TKDE_zoom.eps", onefile=FALSE, horizontal=FALSE) }
d1 = density(Y)
plot(d1$x,d1$y,type="l",xlab="y",ylab="Density(y)",xlim=c(0.035,0.06),ylim=c(0,5))

d2 = density(x)
ginvx = qstd(pnorm(d2$x), mean = fit_std$par[1], sd = fit_std$par[2], nu = fit_std$par[3])
gprime_num = dstd(ginvx,mean = fit_std$par[1], sd = fit_std$par[2], nu = fit_std$par[3])
gprime_den = dnorm(qnorm(pstd(ginvx,mean = fit_std$par[1], sd = fit_std$par[2], nu = fit_std$par[3])))
gprime = gprime_num/gprime_den
lines(ginvx,d2$y*gprime,type="l",lty=2)

x_values = seq( min(Y), max(Y), length.out=100 )
lines( x_values, dstd( x_values, mean = fit_std$par[1], sd = fit_std$par[2], nu = fit_std$par[3]), type="l", lty=3 )

legend("topleft",c("KDE","TKDE","Parametric std"),lty=c(1,2),lwd=2,cex=1.5)
if( save_plots ){ dev.off() }


# P 10 (follow the code from P 7):
#
Y = diff(log(EuStockMarkets[, 4])) # returns on the FTSE index
start = c(mean(Y), sd(Y), 4, 1.5) # optimization starting point 
fit_sstd = optim( start, loglik_sstd, method="L-BFGS-B", lower=c(-0.1,0.001,2.1,0.5), upper=c(+0.1,1.0,20.0,3.0), hessian=TRUE )
m_logL_sstd = fit_sstd$value # minus the log-likelihood
AIC_sstd = 2*m_logL_sstd + 2 * length(fit_sstd$par)
print(c("MLE=", round(fit_sstd$par, digits=5)))
CovMLE = solve(fit_sstd$hessian)
se = sqrt(diag(CovMLE)) # standard errors
print(c("MLE Std Err=", round(se, digits=5)))
print(sprintf('AIC= %f', AIC_sstd))

# P 11:
#
data = read.csv('../../BookCode/Data/MCD_PriceDaily.csv')
adjPrice = data[, 7]
LogRet = diff(log(adjPrice))
library(MASS)
library(fGarch)
fit.T = fitdistr(LogRet, 't')
params.T = fit.T$estimate
mean.T = params.T[1]
nu.T = params.T[3]
sd.T = params.T[2] * sqrt( nu.T / (nu.T - 2) )
x = seq(-0.04, 0.04, by=0.0001)
hist(LogRet, 80, freq=FALSE)
lines(x, dstd(x, mean=mean.T, sd=sd.T, nu=nu.T), lwd=2, lty=2, col='red')


# P 12:
# 
print( fit.T$estimate[1] + 2*fit.T$sd[1]*c(-1, +1) ) 


# P 13:
# 
print( fit.T$estimate[3] + 2*fit.T$sd[3]*c(-1, +1) )