get_cancer_data = function(){ con = file('~/Projects/Reading/N_Z/Zelterman/Data/CANCER.DAT', open='r') data = c() while( TRUE ){ line = readLines(con, n=1) line = gsub('\\?\\?', 'NA', line) if( length(line)==0 || nchar(line)==0 ){ break } T = read.table(text=line, stringsAsFactors=FALSE) n = ncol(T) number_columns = 1:11 df = as.matrix(T[, number_columns]) if( length(data)==0 ){ data = df }else{ data = rbind(data, df) } } close(con) data = data.frame(data) colnames(data) = c('age', 'pop47', 'd47', ## pick names that match the names used in the book for this exercise 'pop51', 'd51', 'pop56', 'd56', 'pop61', 'd61', 'pop66', 'd66' ) data$age = as.double(data$age) data$pop47 = as.double(data$pop47) data$d47 = as.double(data$d47) data$pop51 = as.double(data$pop51) data$d51 = as.double(data$d51) data$pop56 = as.double(data$pop56) data$d56 = as.double(data$d56) data$pop61 = as.double(data$pop61) data$d61 = as.double(data$d61) data$pop66 = as.double(data$pop66) data$d66 = as.double(data$d66) ##DF = DF[complete.cases(DF),] ## drop an NAs return(data) }