#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
# EPage 384
#
#-----

save_plots = F

if( ! require("MASS") ){ install.packages("MASS") }
if( ! require("e1071") ){ install.packages("e1071") }

set.seed(0)

vt100ClearScreen <- function(...) cat("\033[2J") 
vt100ClearScreen()

# Part (a):
#
n = 5000
p = 2 

x1 = runif(n) - 0.5
x2 = runif(n) - 0.5
y = 1*( x1^2 - x2^2 > 0 )
#y = 1*( x1^2 + x2^2 > 1 )
#y = 1*( abs(x1) - abs(x2) > 0 )
DF = data.frame( x1=x1, x2=x2, y=as.factor(y) )

# Part (b):
#
if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_original_data.eps", onefile=FALSE, horizontal=FALSE) }
plot( x1, x2, col=(y+1), pch=19, cex=1.05, xlab='x1', ylab='x2', main='initial data' )
grid()
if( save_plots ){ dev.off() }

# Part (c):  
#
m = glm( y ~ x1 + x2, data=DF, family=binomial )

# Part (d): Predict the class label of the training data.
#
# When type="response" in predict we output probabilities:
# 
y_hat = predict( m, newdata=data.frame(x1=x1,x2=x2), type="response" )
predicted_class = 1 * ( y_hat > 0.5 ) 
print( sprintf('Linear logistic regression training error rate= %10.6f',  1 - sum( predicted_class == y )/length(y) ) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_linear_LR_predicted_class.eps", onefile=FALSE, horizontal=FALSE) }
plot( x1, x2, col=(predicted_class+1), pch=19, cex=1.05, xlab='x1', ylab='x2', main='logistic regression: y ~ x1 + x2' )
grid()
if( save_plots ){ dev.off() }

# Part (e-f): Using logistic regression to fit a nonlinear model: 
#
m = glm( y ~ x1 + x2 + I(x1^2) + I(x2^2) + I(x1*x2), data=DF, family="binomial" )
y_hat = predict( m, newdata=data.frame(x1=x1,x2=x2), type="response" )
predicted_class = 1 * ( y_hat > 0.5 ) 
print( sprintf('Non-linear logistic regression training error rate= %10.6f',  1 - sum( predicted_class == y )/length(y) ) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_nonlinear_LR_predicted_class.eps", onefile=FALSE, horizontal=FALSE) }
plot( x1, x2, col=(predicted_class+1), pch=19, cex=1.05, xlab='x1', ylab='x2' )
grid()
if( save_plots ){ dev.off() }

# Part (g): Fit a linear SVM to the data and report the error rate: 
#
dat = data.frame( x1=x1, x2=x2, y=as.factor(y) )

# Do CV to select the value of cost using the "tune" function:
#
tune.out = tune( svm, y ~ ., data=dat, kernel="linear", ranges=list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000) ) )
print( summary(tune.out) ) # <- use this output to select the optimal cost value

# The best model is stored in "tune.out$best.model" 
# 
print(tune.out$best.model)

y_hat = predict( tune.out$best.model, newdata=data.frame(x1=x1,x2=x2) )
y_hat = as.numeric( as.character( y_hat ) ) # convert factor responses into numerical values 
print( sprintf('Linear SVM training error rate= %10.6f',  1 - sum( y_hat == y )/length(y) ) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_linear_SVM_predicted_class.eps", onefile=FALSE, horizontal=FALSE) }
plot( x1, x2, col=(y_hat+1), pch=19, cex=1.05, xlab='x1', ylab='x2' )
grid()
if( save_plots ){ dev.off() }

# Part (h-i): Fit a linear SVM to the data and report the error rate: 
#

# Do CV to select the value of cost using the "tune" function:
#
tune.out = tune( svm, y ~ ., data=dat, kernel="radial", ranges = 
	         list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000),
                       gamma=c(0.5,1,2,3,4) ) )
print( summary(tune.out) ) # <- use this output to select the optimal cost value

# The best model is stored in "tune.out$best.model" 
# 
print(tune.out$best.model)

y_hat = predict( tune.out$best.model, newdata=data.frame(x1=x1,x2=x2) )
y_hat = as.numeric( as.character( y_hat ) ) # convert factor responses into numerical values 
print( sprintf('Nonlinear SVM training error rate= %10.6f',  1 - sum( y_hat == y )/length(y) ) )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_nonlinear_SVM_predicted_class.eps", onefile=FALSE, horizontal=FALSE) }
plot( x1, x2, col=(y_hat+1), pch=19, cex=1.05, xlab='x1', ylab='x2' )
grid()
if( save_plots ){ dev.off() }