# # Utilities to load the data into R from Hocking's book METHODS AND APPLICATIONS OF LINEAR MODELS (second edition): # # Data for the book was found here: # # http://www.filewatcher.com/b/ftp/ftp.wiley.com/public/sci_tech_med/hocking-0.html # # Get the file: HOCKING2.ZIP for the second edition # data_dir = "/home/wax/Projects/Reading/G_M/Hocking/Data/Hocking2" load_forbes_data = function(){ DF = data.frame( pressure = c( 20.79, 20.79, 22.40, 22.67, 23.15, 23.35, 23.89, 23.99, 24.02, 24.01, 25.14, 26.57, 28.49, 27.76, 29.04, 29.88, 30.66 ), boiling_point = c( 194.5, 194.3, 197.9, 198.4, 199.4, 199.9, 200.9, 201.1, 201.4, 201.3, 203.6, 204.6, 209.5, 208.6, 210.7, 211.9, 212.2 ) ) } load_particle_board_data = function(){ #fn = file.path(data_dir, "APP-D-20.REV.DAT") #DF = read.table(fn, header=TRUE) DF = data.frame( temperature = c( rep(40, 3), rep(45, 3), rep(50, 3), rep(55, 3), rep(60, 3), rep(65, 3) ), strength = c( 66.3 , 64.84, 64.36, 69.70, 66.26, 72.06, 73.23, 71.40, 68.85, 75.78, 72.57, 76.64, 78.78, 77.37, 75.94, 78.82, 77.13, 77.09 ) ) } load_exercise_2_31_data = function(){ # # Note that the data that comes out of this file is: # # 1973 -> 73 # 1993 -> 93 # 2000 -> 100 # # Thus for new data say x=2050 we return (x-1900) # fn = file.path(data_dir, "EXER2-31.REV.DAT") DF = read.table(fn, header=TRUE) colnames(DF) = gsub( ".", "", colnames(DF), fixed=TRUE ) DF } load_exercise_2_32_data = function(){ fn = file.path(data_dir, "EXER2-32.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF } load_exercise_3_4_data = function(){ # Read the data as a "wide" dataframe: # fn = file.path(data_dir, "EXER3-4.REV.DAT") DF = read.table(fn, header=FALSE, fill=TRUE, skip=1) DF$V1 = NULL colnames(DF) = c("X", "Y_1", "Y_2", "Y_3") # Reformat into a "long" dataframe: # DF = reshape( DF, varying=c("Y_1", "Y_2", "Y_3"), timevar="order", direction="long", idvar="X", sep="_" ) DF$order = NULL rownames(DF) = 1:length(DF$X) DF } load_exercise_3_7_data = function(){ # Read the data as a "wide" dataframe: # fn = file.path(data_dir, "EXER3-7.REV.DAT") DF = read.table(fn, header=FALSE, fill=TRUE, skip=1) DF$V1 = NULL colnames(DF) = c("AGE", "P_1", "P_2", "P_3", "P_4", "P_5") # Reformat into a "long" dataframe: # DF = reshape( DF, varying=c("P_1", "P_2", "P_3", "P_4", "P_5"), timevar="order", direction="long", idvar="AGE", sep="_" ) DF$order = NULL rownames(DF) = 1:length(DF$AGE) DF } load_exercise_3_8_data = function(){ # Read the data as a "wide" dataframe: # fn = file.path(data_dir, "EXER3-8.REV.DAT") DF = read.table(fn, header=FALSE, fill=TRUE, skip=1) DF$V1 = NULL colnames(DF) = c("X", "Y_1", "Y_2", "Y_3") # Reformat into a "long" dataframe: # DF = reshape( DF, varying=c("Y_1", "Y_2", "Y_3"), timevar="order", direction="long", idvar="X", sep="_" ) DF$order = NULL rownames(DF) = 1:length(DF$X) DF } load_appendix_survival_data = function(){ fn = file.path(data_dir, "APP-D-1.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF } load_appendix_cigarettes_data = function(){ fn = file.path(data_dir, "APP-D-2.REV.DAT") DF = read.table(fn, header=TRUE) } load_appendix_highway_fatality_data = function(){ fn = file.path(data_dir, "APP-D-3.REV.DAT") DF = read.table(fn, header=TRUE) } load_appendix_Indianapolis_500_data = function(){ fn = file.path(data_dir, "APP-D-4.REV.DAT") DF = read.table(fn, header=TRUE) } load_appendix_steel_production_data = function(){ fn = file.path(data_dir, "APP-D-5.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF = DF[, c(2, 3, 4, 1) ] # order the variables: WID DENS STR PROD=response DF } load_appendix_cement_data = function(){ fn = file.path(data_dir, "APP-D-6.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF } load_appendix_gas_mileage_data = function(){ fn = file.path(data_dir, "APP-D-7.REV.DAT") DF = read.table(fn, header=TRUE) DF = DF[, c(2, 3, 4, 5, 1)] # order the variables: DISP HP WT TRAN MPG=response } load_appendix_lamb_data = function(){ fn = file.path(data_dir, "APP-D-8.REV.DAT") DF = read.table(fn, header=TRUE) so = order(colnames(DF)) DF = DF[so, so] # order the column names as X1 X2 X3 X4 X5 X6 Y rownames(DF) = colnames(DF) DF } load_appendix_refinery_correlation_data = function(){ fn = file.path(data_dir, "APP-D-9.REV.DAT") DF = read.table(fn, header=TRUE) } load_appendix_pitprop_data = function(){ # The data is stored in the file with line breaks thus we need to perform some special parsing to read it in: # fn = file.path(data_dir, "APP-D-10.REV.DAT") DF = read.table(fn, header=TRUE, fill=TRUE) nrows = dim(DF)[1]/2 ncols = length(colnames(DF)) data = c() for( ri in 0:(nrows-1) ){ row = as.double( c( DF[2*ri+1, 1:9], DF[2*ri+2, 1:5] ) ) data = c( data, row ) } D = data.frame( matrix( data, nrow=nrows, ncol=ncols, byrow=TRUE ) ) colnames(D) = colnames(DF) rownames(D) = colnames(DF) D } load_appendix_bodyfat_data = function(){ fn = file.path(data_dir, "APP-D-11.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF } load_appendix_academy_bodyfat_data = function(){ fn = file.path(data_dir, "APP-D-12.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF } load_appendix_dilemma_data = function(){ fn = file.path(data_dir, "APP-D-13.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF } load_appendix_stack_loss_data = function(){ fn = file.path(data_dir, "APP-D-14.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF } load_appendix_refinery_data = function(){ # # This is the raw refinery data: # fn = file.path(data_dir, "APP-D-15.REV.DAT") DF = read.table(fn, header=TRUE) DF$OBS = NULL colnames(DF) = c("WATER", "PROD", "CAP") DF } load_appendix_polymer_data = function(){ fn = file.path(data_dir, "APP-D-16.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF } load_appendix_Hookers_data = function(){ fn = file.path(data_dir, "APP-D-17.REV.DAT") DF = read.table(fn, header=TRUE) DF$OBS = NULL DF } load_appendix_sediment_data = function(){ fn = file.path(data_dir, "APP-D-18.REV.DAT") DF = read.table(fn, header=FALSE, skip=2) colnames(DF) = c("CASE", "RUN", "PREC", "YIELD") DF$CASE = NULL DF } load_appendix_deforestation_data = function(){ fn = file.path(data_dir, "APP-D-19.REV.DAT") DF = read.table(fn, header=TRUE) DF$CASE = NULL DF }