#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
#
# xpdf ../../EBook/An_Introduction_to_Statistical_Learning.pdf 385 -z 200 & 
#
#-----

save_plots = F

if( !require("ISLR") ){ install.packages("ISLR") }
if( !require("MASS") ){ install.packages("MASS") }
if( !require("e1071") ){ install.packages("e1071") }

set.seed(0)

vt100ClearScreen <- function(...) cat("\033[2J") 
vt100ClearScreen()

# Part (a):
#
n = dim(OJ)[1]
n_train = 800
train_inds = sample(1:n,n_train)
test_inds = (1:n)[-train_inds]
n_test = length(test_inds)

# Part (b) Use a linear kernel to start with:
#
svm.fit = svm( Purchase ~ ., data=OJ[train_inds,], kernel="linear", cost=0.01 )
print( summary( svm.fit ) )

# Part (c) Use this specific SVM to estimate training/testing error rates:
#
y_hat = predict( svm.fit, newdata=OJ[train_inds,] )
print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) )
print( sprintf('Linear SVM training error rate (cost=0.01)= %10.6f',  1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) )

y_hat = predict( svm.fit, newdata=OJ[test_inds,] )
print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) )
print( sprintf('Linear SVM testing error rate (cost=0.01)= %10.6f',  1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test ) )

# Part (d): Use "tune" to select an optimal value for cost when we have a linear kernel:
#
tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="linear", ranges=list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000) ) )
print( summary(tune.out) ) # <- use this output to select the optimal cost value

# Part (e): Predict the performance on training and testing using the best linear model: 
# 
y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] )
print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) )
print( sprintf('Linear SVM training error rate (optimal cost=%.2f)= %10.6f',  tune.out$best.model$cost, 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) )

y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] )
print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) )
print( sprintf('Linear SVM testing error rate (optimal cost=%.2f)= %10.6f',  tune.out$best.model$cost, 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )

# Part (f): Use a radial kernel: 
# 
tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) )
print( summary(tune.out) ) # <- use this output to select the optimal cost value

y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] )
print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) )
print( sprintf('Radial SVM training error rate (optimal)= %10.6f',  1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) )

y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] )
print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) )
print( sprintf('Radial SVM testing error rate (optimal)= %10.6f',  1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )

# Part (g): Use a polynomial kernel: 
# 
tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="polynomial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), degree=c(1, 2, 3) ) )
print( summary(tune.out) ) # <- use this output to select the optimal cost value

y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] )
print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) )
print( sprintf('Polynomial SVM training error rate (optimal)= %10.6f',  1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) )

y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] )
print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) )
print( sprintf('Polynomial SVM testing error rate (optimal)= %10.6f',  1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )