# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- save_plots = F # # EPage 140 # DF = read.csv( "../../Data/wine.csv", header=TRUE ) if( FALSE ){ # Move 2000 to the reference year: inds_gt50 = DF$Vintage_Year > 50 DF[inds_gt50,2] = DF[inds_gt50,2] - 100 }else{ # Convert to century based year indexing i.e. 1999, 2001 etc: inds_le5 = DF$Vintage_Year <= 5 inds_ge80 = DF$Vintage_Year >= 80 DF[inds_le5,2] = DF[inds_le5,2] + 2000 DF[inds_ge80,2] = DF[inds_ge80,2] + 1900 } # Part 1: # # The difference in prices between wines older than 10 years and ones less than 10 years # (relative to the 2005 date on which these prices were quoted) # older_than_ten_years = DF$Vintage_Year <= 1995 # an indicator random variable on whether or not this wine is older than 10 years from 2005 DF$Older_Than_Ten = 0. DF$Older_Than_Ten[older_than_ten_years] = 1. # Build a linear model to see if there is an effect based on older than 10 years: summary( lm( Price ~ Older_Than_Ten, data=DF ) ) summary( lm( log(Price) ~ Older_Than_Ten, data=DF ) ) # Part 2: # corr1 = cor( DF$Rating_Points, DF$Price ) corr2 = cor( log(DF$Rating_Points), DF$Price ) corr3 = cor( DF$Rating_Points, log(DF$Price )) corr4 = cor( log(DF$Rating_Points), log(DF$Price) ) print( "Correlations between Rating_Points and Price (transformed or not)" ) print( c( corr1, corr2, corr3, corr4 ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/mow_plot_each_model.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(1,2)) plot( DF$Rating_Points, DF$Price ) abline( lm( Price ~ Rating_Points, data=DF ) ) plot( log(DF$Rating_Points), log(DF$Price) ) abline( lm( log(Price) ~ log(Rating_Points), data=DF ) ) par(mfrow=c(1,1)) if( save_plots ){ dev.off() } # Part 3 (lets take the logarithmic transformation of both price and rating points): # DF$LPrice = log(DF$Price) DF$LRating_Points = log(DF$Rating_Points) m = lm( LPrice ~ LRating_Points + Vintage_Year, data=DF ) summary(m) # Plot the residuals of this reduced model: # if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/mow_residual_plot.eps", onefile=FALSE, horizontal=FALSE) } plot(m, which=1) if( save_plots ){ dev.off() } print( DF[ c(89, 85, 64), ] ) # Drop the $1400 bottle and refit: DF2 = DF[-c(89) ,] m2 = lm( LPrice ~ LRating_Points + Vintage_Year, data=DF2 ) summary(m2) # Part 4 (Look for interaction terms) # m3 = lm( LPrice ~ LRating_Points + Vintage_Year + LRating_Points:Vintage_Year, data=DF ) summary(m3) # Convert the vintage year to a relative year (number of years before 2005): # As this is just a linear tranformation of Vintage_Year it won't affect the significance of this predictor # (but the predictor might be easier to understand in the model) # DF$Relative_Vintage_Year = 2005 - DF$Vintage_Year m4 = lm( LPrice ~ LRating_Points + Relative_Vintage_Year + LRating_Points:Relative_Vintage_Year, data=DF ) summary(m4)