# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- save_plots = F # # EPage 140 # DF = read.csv( "../../Data/Swiss_birth_rates.csv", header=TRUE ) rownames(DF) = DF$Province_Name DF$Province_Name = NULL # Look for skewed distributions of the input predictors plot( density( DF$Pct_Agricultural ) ) # somewhat left skewed plot( density( DF$High_Exams ) ) # somewhat right skewed if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/fris_high_education_plot.eps", onefile=FALSE, horizontal=FALSE) } plot( density( DF$High_Education ) ) # very skewed to the right if( save_plots ){ dev.off() } plot( density( DF$Pct_Catholic ) ) # bimodal plot( density( DF$Infant_Mortality ) ) # skewed to the left # Transform the education variable with logs: DF$LHigh_Education = log( DF$High_Education ) DF$High_Education = NULL # Notice that this threshold splits the data in 1/2: table( DF$Pct_Catholic < 15 ) # Lets see which predictors are most correlated with fertility rate # (they should be very correlated with each other also): # fr_cor = cor( DF ) print( fr_cor ) # What is the average off—diagonal correlation: mask = upper.tri( fr_cor ) print( mean( abs( fr_cor[mask] ) ) ) # Lets get a baseline performance metric: # m_baseline = lm( Fertility_Rate ~ ., data=DF ) summary(m_baseline) # Apply step to see what model it suggests: step( m_baseline ) # What predictor should I remove: plot( DF$Pct_Catholic, DF$Fertility_Rate ) plot( DF$Pct_Agricultural, DF$Fertility_Rate ) # Remove Pct_Catholic: sml = update(m_baseline, . ~ . - Pct_Catholic) summary(sml) # Remove Pct_Agricultural: sm2 = update(m_baseline, . ~ . - Pct_Agricultural) summary(sm2) # Remove Pct_Catholic and Pct_Agricultural: sm3 = update(m_baseline, . ~ . - Pct_Catholic - Pct_Agricultural) summary(sm3) # Remove Pct_Catholic, Pct_Agricultural, and LHigh_Education: sm4 = update( m_baseline, . ~ . - Pct_Catholic - Pct_Agricultural - LHigh_Education ) summary(sm4)