# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 386 # #----- save_plots = F if( ! require("ISLR") ){ install.packages("ISLR") } if( ! require("MASS") ){ install.packages("MASS") } if( ! require("class") ){ install.packages("class") } set.seed(0) vt100ClearScreen <- function(...) cat("\033[2J") vt100ClearScreen() # Part (a): Direction = Weekly$Direction Weekly$Direction = NULL Weekly$NumericDirection = as.numeric( Direction ) # Maps Down=>1 and Up=>2 Weekly$NumericDirection[ Weekly$NumericDirection==1 ] = -1 # Maps Down=>-1 and Up=>2 Weekly$NumericDirection[ Weekly$NumericDirection==2 ] = +1 # Maps Down=>-1 and Up=>+1 # Look at the correlation between the output and the input lags: # Weekly.cor = cor(Weekly) # b: logistic regression to predict Direction as a function of 5 lag variables + volume: # Weekly$NumericDirection = NULL Weekly$Direction = Direction five_lag_model = glm( Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data=Weekly, family=binomial ) print( summary(five_lag_model) ) print( contrasts( Weekly$Direction ) ) # c: the confusion matrix: # p_hat = predict( five_lag_model, newdata=Weekly, type="response" ) y_hat = rep( "Down", length(p_hat) ) y_hat[ p_hat > 0.5 ] = "Up" CM = table( predicted=y_hat, truth=Weekly$Direction ) print( CM ) print( sprintf( "LR (all features): overall fraction correct= %10.6f", ( CM[1,1] + CM[2,2] ) / sum(CM) ) ) # d: logistic regression using only Lag2 as the predictor (since it is the most significant predictor) # Weekly.train = ( Weekly$Year >= 1990 ) & ( Weekly$Year <= 2008 ) # our training set Weekly.test = ( Weekly$Year >= 2009 ) # our testing set lag2_model = glm( Direction ~ Lag2, data=Weekly, family=binomial, subset=Weekly.train ) # CM on test data : # p_hat = predict( lag2_model, newdata=Weekly[Weekly.test,], type="response" ) y_hat = rep( "Down", length(p_hat) ) y_hat[ p_hat > 0.5 ] = "Up" CM = table( predicted=y_hat, truth=Weekly[Weekly.test,]$Direction ) print( CM ) print( sprintf( "LR (only Lag2): overall fraction correct= %10.6f", ( CM[1,1] + CM[2,2] ) / sum(CM) ) ) #e: Use LDA # lda.fit = lda( Direction ~ Lag2, data=Weekly, subset=Weekly.train ) lda.predict = predict( lda.fit, newdata=Weekly[Weekly.test,] ) CM = table( predicted=lda.predict$class, truth=Weekly[Weekly.test,]$Direction ) print( CM ) print( sprintf( "LDA (only Lag2): overall fraction correct= %10.6f", ( CM[1,1] + CM[2,2] ) / sum(CM) ) ) #f: Use QDA # qda.fit = qda( Direction ~ Lag2, data=Weekly, subset=Weekly.train ) qda.predict = predict( qda.fit, newdata=Weekly[Weekly.test,] ) CM = table( predicted=qda.predict$class, truth=Weekly[Weekly.test,]$Direction ) print( CM ) print( sprintf( "QDA (only Lag2): overall fraction correct= %10.6f", ( CM[1,1] + CM[2,2] ) / sum(CM) ) ) #g: KNN # X.train = data.frame( Lag2=Weekly[ Weekly.train, ]$"Lag2" ) Y.train = Weekly[ Weekly.train, ]$"Direction" X.test = data.frame( Lag2=Weekly[ Weekly.test, ]$"Lag2" ) y_hat_k_1 = knn( X.train, X.test, Y.train, k=1, use.all=FALSE ) CM = table( predicted=y_hat_k_1, truth=Weekly[ Weekly.test, ]$Direction ) print( CM ) print( sprintf( "KNN (k=1): overall fraction correct= %10.6f", ( CM[1,1] + CM[2,2] ) / sum(CM) ) ) y_hat_k_3 = knn( X.train, X.test, Y.train, k=3, use.all=FALSE ) CM = table( predicted=y_hat_k_3, truth=Weekly[ Weekly.test, ]$Direction ) print( CM ) print( sprintf( "KNN (k=3): overall fraction correct= %10.6f", ( CM[1,1] + CM[2,2] ) / sum(CM) ) )