#
# Utilities to load the data into R from Hocking's book METHODS AND APPLICATIONS OF LINEAR MODELS (second edition):
#
# Data for the book was found here:
#
# http://www.filewatcher.com/b/ftp/ftp.wiley.com/public/sci_tech_med/hocking-0.html
#
# Get the file: HOCKING2.ZIP for the second edition
#
data_dir = "/home/wax/Projects/Reading/G_M/Hocking/Data/Hocking2"

load_forbes_data = function(){
    DF = data.frame(
        pressure = c( 20.79, 20.79, 22.40, 22.67, 23.15, 23.35, 23.89, 23.99, 24.02, 24.01, 25.14, 26.57, 28.49, 27.76, 29.04, 29.88, 30.66 ),
        boiling_point = c( 194.5, 194.3, 197.9, 198.4, 199.4, 199.9, 200.9, 201.1, 201.4, 201.3, 203.6, 204.6, 209.5, 208.6, 210.7, 211.9, 212.2 )
    )
}

load_particle_board_data = function(){
    #fn = file.path(data_dir, "APP-D-20.REV.DAT")
    #DF = read.table(fn, header=TRUE)
    DF = data.frame(
        temperature = c( rep(40, 3), rep(45, 3), rep(50, 3),
                         rep(55, 3), rep(60, 3), rep(65, 3) ),
        strength = c( 
            66.3 , 64.84, 64.36, 
            69.70, 66.26, 72.06,
            73.23, 71.40, 68.85,
            75.78, 72.57, 76.64,
            78.78, 77.37, 75.94,
            78.82, 77.13, 77.09 )
        )
}

load_exercise_2_31_data = function(){
    #
    # Note that the data that comes out of this file is:
    #
    # 1973 -> 73
    # 1993 -> 93
    # 2000 -> 100
    #
    # Thus for new data say x=2050 we return (x-1900)
    #
    fn = file.path(data_dir, "EXER2-31.REV.DAT")
    DF = read.table(fn, header=TRUE)
    colnames(DF) = gsub( ".", "", colnames(DF), fixed=TRUE )
    DF
}

load_exercise_2_32_data = function(){
    fn = file.path(data_dir, "EXER2-32.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}

load_exercise_3_4_data = function(){
    # Read the data as a "wide" dataframe:
    #
    fn = file.path(data_dir, "EXER3-4.REV.DAT")
    DF = read.table(fn, header=FALSE, fill=TRUE, skip=1)
    DF$V1 = NULL
    colnames(DF) = c("X", "Y_1", "Y_2", "Y_3")

    # Reformat into a "long" dataframe:
                                        #
    DF = reshape( DF, varying=c("Y_1", "Y_2", "Y_3"), timevar="order", direction="long", idvar="X", sep="_" )
    DF$order = NULL
    rownames(DF) = 1:length(DF$X)
    DF
}

load_exercise_3_7_data = function(){
    # Read the data as a "wide" dataframe:
    #
    fn = file.path(data_dir, "EXER3-7.REV.DAT")
    DF = read.table(fn, header=FALSE, fill=TRUE, skip=1)
    DF$V1 = NULL
    colnames(DF) = c("AGE", "P_1", "P_2", "P_3", "P_4", "P_5")

    # Reformat into a "long" dataframe:
    #
    DF = reshape( DF, varying=c("P_1", "P_2", "P_3", "P_4", "P_5"), timevar="order", direction="long", idvar="AGE", sep="_" )
    DF$order = NULL
    rownames(DF) = 1:length(DF$AGE)
    DF
}

load_exercise_3_8_data = function(){
    # Read the data as a "wide" dataframe:
    #
    fn = file.path(data_dir, "EXER3-8.REV.DAT")
    DF = read.table(fn, header=FALSE, fill=TRUE, skip=1)
    DF$V1 = NULL
    colnames(DF) = c("X", "Y_1", "Y_2", "Y_3")

    # Reformat into a "long" dataframe:
    #
    DF = reshape( DF, varying=c("Y_1", "Y_2", "Y_3"),
    timevar="order", direction="long", idvar="X", sep="_" )
    DF$order = NULL
    rownames(DF) = 1:length(DF$X)
    DF
}

load_appendix_survival_data = function(){
    fn = file.path(data_dir, "APP-D-1.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}

load_appendix_cigarettes_data = function(){
    fn = file.path(data_dir, "APP-D-2.REV.DAT")
    DF = read.table(fn, header=TRUE)
}

load_appendix_highway_fatality_data = function(){
    fn = file.path(data_dir, "APP-D-3.REV.DAT")
    DF = read.table(fn, header=TRUE)
}

load_appendix_Indianapolis_500_data = function(){
    fn = file.path(data_dir, "APP-D-4.REV.DAT")
    DF = read.table(fn, header=TRUE)
}

load_appendix_steel_production_data = function(){
    fn = file.path(data_dir, "APP-D-5.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF = DF[, c(2, 3, 4, 1) ] # order the variables: WID DENS STR PROD=response
    DF
}

load_appendix_cement_data = function(){
    fn = file.path(data_dir, "APP-D-6.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}

load_appendix_gas_mileage_data = function(){
    fn = file.path(data_dir, "APP-D-7.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF = DF[, c(2, 3, 4, 5, 1)] # order the variables: DISP HP WT TRAN MPG=response
}

load_appendix_lamb_data = function(){
    fn = file.path(data_dir, "APP-D-8.REV.DAT")
    DF = read.table(fn, header=TRUE)
    so = order(colnames(DF))
    DF = DF[so, so] # order the column names as X1 X2 X3 X4 X5 X6 Y
    rownames(DF) = colnames(DF)
    DF
}

load_appendix_refinery_correlation_data = function(){
    fn = file.path(data_dir, "APP-D-9.REV.DAT")
    DF = read.table(fn, header=TRUE)
}

load_appendix_pitprop_data = function(){
    # The data is stored in the file with line breaks thus we need to perform some special parsing to read it in:
    #
    fn = file.path(data_dir, "APP-D-10.REV.DAT")
    DF = read.table(fn, header=TRUE, fill=TRUE)
    nrows = dim(DF)[1]/2
    ncols = length(colnames(DF))
    data = c()
    for( ri in 0:(nrows-1) ){
        row = as.double( c( DF[2*ri+1, 1:9], DF[2*ri+2, 1:5] ) )
        data = c( data, row )
    }
    D = data.frame( matrix( data, nrow=nrows, ncol=ncols, byrow=TRUE ) )
    colnames(D) = colnames(DF)
    rownames(D) = colnames(DF)
    D
}

load_appendix_bodyfat_data = function(){
    fn = file.path(data_dir, "APP-D-11.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}

load_appendix_academy_bodyfat_data = function(){
    fn = file.path(data_dir, "APP-D-12.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}

load_appendix_dilemma_data = function(){
    fn = file.path(data_dir, "APP-D-13.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}

load_appendix_stack_loss_data = function(){
    fn = file.path(data_dir, "APP-D-14.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}

load_appendix_refinery_data = function(){
    #
    # This is the raw refinery data:
    #
    fn = file.path(data_dir, "APP-D-15.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$OBS = NULL
    colnames(DF) = c("WATER", "PROD", "CAP")
    DF
}

load_appendix_polymer_data = function(){
    fn = file.path(data_dir, "APP-D-16.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}

load_appendix_Hookers_data = function(){
    fn = file.path(data_dir, "APP-D-17.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$OBS = NULL
    DF
}

load_appendix_sediment_data = function(){
    fn = file.path(data_dir, "APP-D-18.REV.DAT")
    DF = read.table(fn, header=FALSE, skip=2)
    colnames(DF) = c("CASE", "RUN", "PREC", "YIELD")
    DF$CASE = NULL
    DF 
}

load_appendix_deforestation_data = function(){
    fn = file.path(data_dir, "APP-D-19.REV.DAT")
    DF = read.table(fn, header=TRUE)
    DF$CASE = NULL
    DF
}