# 
# Written by:
# -- 
# John L. Weatherwax                2005-08-04
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
# Epage 169
# 
#-----

library(MASS)
library(nnet)

varValue = 0.1 # this gives data that is more "mixed" than originally suggested ...
Sigma = matrix( c(varValue,0.0,0.0,varValue), 2, 2 )


# Generate the data suggested (basically the XOR data set):
# 
nPerClust = 200
X1Pt1 = mvrnorm( n=nPerClust, c(0,0), Sigma )
X1Pt2 = mvrnorm( n=nPerClust, c(1,1), Sigma )
X2Pt1 = mvrnorm( n=nPerClust, c(0,1), Sigma )
X2Pt2 = mvrnorm( n=nPerClust, c(1,0), Sigma )

X1  = rbind( X1Pt1, X1Pt2 )
nX1 = dim(X1)[1]

X2  = rbind( X2Pt1, X2Pt2 )
nX2 = dim(X2)[1]

# Plot the training data:
#
if( T ){ 
  #postscript("../../WriteUp/Graphics/Chapter4/prob_2_initial_data.eps", onefile=FALSE, horizontal=FALSE)
  matplot( X1[,1], X1[,2], xlab='x_1', ylab='x_2', pch=19, col="red" )
  matplot( X2[,1], X2[,2], xlab='x_1', ylab='x_2', pch=18, col="blue", add=T )
  #dev.off()
}

# Form the full set of training data merging everything:
#
X = merge( X1, X2, all=T ) 
Y = class.ind( c( rep( 1, nX1 ), rep( 0, nX2 ) ) ) # class #1 gets a 1 while class #2 gets a 0


# Generate NTest new vectors and classify them:
#
nPerClust = 25
X1Pt1 = mvrnorm( n=nPerClust, c(0,0), Sigma )
X1Pt2 = mvrnorm( n=nPerClust, c(1,1), Sigma )
X2Pt1 = mvrnorm( n=nPerClust, c(0,1), Sigma )
X2Pt2 = mvrnorm( n=nPerClust, c(1,0), Sigma )
X1Test  = rbind( X1Pt1, X1Pt2 )
nX1Test = dim(X1Test)[1]
X2Test  = rbind( X2Pt1, X2Pt2 )
nX2Test = dim(X2Test)[1]

# Form the full set of data merging everything:
#
XTest = merge( X1Test, X2Test, all=T ) 
YTest = class.ind( c( rep( 1, nX1Test ), rep( 0, nX2Test ) ) ) # class #1 gets a 1 while class #2 gets a 0

# Plot the testing data:
#
postscript("../../WriteUp/Graphics/Chapter4/prob_2_initial_data.eps", onefile=FALSE, horizontal=FALSE)
matplot( X1[,1], X1[,2], xlab='x_1', ylab='x_2', pch=19, col="red" )
matplot( X2[,1], X2[,2], xlab='x_1', ylab='x_2', pch=18, col="blue", add=T )
matplot( XTest[,1], XTest[,2], pch=17, col="black", add=T )
dev.off()

# Train an initial neural network on this data just to get an idea of how to use this command:
#
xorNet = nnet( X, Y, size=2, Wts=rep( 0.1, 12 ), trace=FALSE ) # the example calling sequence

# A function to help with classification accuracy (returns the error rate):
# 
test.cl <- function(true, pred){
        true <- max.col(true)
        cres <- max.col(pred)
        return( sum( true !=cres )/length(true) )
}

xorNetPredictions = predict( xorNet, X )
er                = test.cl( Y, xorNetPredictions ) 
print( sprintf("Initial error rate= %10.3f",er) )

max_iterations = 10
error_curve_training = c()
error_curve_testing = c()
for( ii in 1:max_iterations){
  xor_net_ii  = nnet( X, Y, size=2, Wts=rep( 0.1, 12 ), maxit=ii, trace=FALSE )

  YPrediction = predict( xor_net_ii, X )
  er          = test.cl( Y, YPrediction ) 
  error_curve_training = c(error_curve_training,er)

  YPrediction = predict( xor_net_ii, XTest )
  er          = test.cl( YTest, YPrediction ) 
  error_curve_testing = c(error_curve_testing,er)
}

if( T ){ 
  postscript("../../WriteUp/Graphics/Chapter4/prob_2_learning_curves.eps", onefile=FALSE, horizontal=FALSE)
  matplot( 1:max_iterations, error_curve_training, type="b", pch=18 )
  matplot( 1:max_iterations, error_curve_testing, type="b", pch=19, add=T )
  legend( round(0.6*max_iterations), y=0.5, legend=c( "training error rate", "testing error rate" ), pch = c(18,19) )
  dev.off()
}


# Assume one of the last nets is optimal:
#
YPrediction = predict( xor_net_ii, XTest )
er = test.cl( YTest, YPrediction ) 
print( sprintf("Testing error rate= %10.3f",er) )