if(!requireNamespace('lattice', quietly = TRUE)){ install.packages('lattice', dependencies=TRUE, repos='http://cran.rstudio.com/') } library(lattice) BB = read.table('../../Data/beaniebaby.txt', header=FALSE, skip=0) colnames(BB) = c('Name', 'Dollar_Value', 'Retired', 'Year') if( file.exists('~/Software/RVAideMemoire/R/mood.medtest.R') ){ source('~/Software/RVAideMemoire/R/mood.medtest.R') } save_plots = FALSE set.seed(12345) BB = read.table('../../Data/beaniebaby.txt', header=FALSE, skip=0) colnames(BB) = c('Name', 'Dollar_Value', 'Retired', 'Year') BBSRetired = as.factor(BB$Retired) ##print(head(BB)) histogram(~ Dollar_Value | Retired, layout=c(1, 2), data=BB) ## Find the outliers: ## bb_mean_price = mean(BB$Dollar_Value) bb_std_price = sd(BB$Dollar_Value) mask = BB$Dollar_Value > bb_mean_price + 3*bb_std_price expensiveBB = BB[mask, ] expensiveBB = expensiveBB[order(expensiveBB$Dollar_Value, decreasing=TRUE), ] print(expensiveBB) print(sprintf('tmean(Dollar_Value)= %f', mean(BB$Dollar_Value, trim=0.05))) ## Compare the two means with a t-test: ## retired_values = BB[BB$Retired==1,]$Dollar_Value active_values = BB[BB$Retired==0,]$Dollar_Value res = t.test(retired_values, active_values) print(res) ## Use a nonparametric method: ## res = mood.medtest(Dollar_Value ~ Retired, data=BB) print('Median Test:') print(res) ## Use the rank-sum test: ## res = wilcox.test(Dollar_Value ~ Retired, data=BB) print('Wilcox Test:') print(res) ## Try to build a linear model: ## m1 = lm(Dollar_Value ~ Year, data=BB) print(summary(m1)) if( save_plots ){ postscript('../../WriteUp/Graphics/Chapter7/beanie_baby_models.eps', onefile=FALSE, horizontal=FALSE) } retired = BB$Retired==1 not_retired = BB$Retired==0 plot(jitter(BB[retired, ]$Year), BB[retired, ]$Dollar_Value, type='p', col='red', xlab='Year', ylab='Dollar_Value') lines(jitter(BB[not_retired, ]$Year), BB[not_retired, ]$Dollar_Value, type='p', col='black') x = jitter(BB$Year) y = jitter(BB$Dollar_Value) lines(loess.smooth(x, y, span=0.5), lwd=1, col='blue') abline(m1) grid() if( save_plots ){ dev.off() } ## ## Look at a model of the form: Dollar_Value = beta_0 + beta_1 I(Retired==1) + beta_2 Year ## m2 = lm(Dollar_Value ~ Year + Retired, data=BB) print(summary(m2))