library(reshape2) read_mouse_liver_cancer_data = function(){ dose = c(0.0, 0.3, 0.35, 0.45, 0.6, 0.75, 1.0, 1.5) ##* 1.e-4 dose_header = sprintf( 'dose_%d', 1:length(dose) ) amt_of_data = c(8, 9) ## even rows have 8 entries of valid data; odd lines have 9 entries of valid data even_rows = c() odd_rows = c() ## Read the data row by row (is there a better way?): ## fn = '../../Data/mouse-liver-cancer.dat' rl = readLines(fn, warn=FALSE) for( ln in 1:length(rl) ){ aod = amt_of_data[(ln %% 2) + 1] parts = strsplit(trimws(rl[ln]), '\\s+')[[1]] if( ln %% 2 == 0 ){ even_rows = rbind(even_rows, t(parts[1:aod]) ) }else{ odd_rows = rbind(odd_rows, t(parts[1:aod]) ) } } NE = data.frame(even_rows) ## N(umber) E(xposed) colnames(NE) = dose_header ND = data.frame(odd_rows) ## N(umber) D(eveloped) cancer months_on_study = factor(ND[, 1], levels=ND[, 1], ordered=TRUE) ND = ND[, -1] colnames(ND) = dose_header ## Convert our date to numbers: NE = as.data.frame(lapply(NE, function(x){as.numeric(as.character(x))})) ND = as.data.frame(lapply(ND, function(x){as.numeric(as.character(x))})) NE$months_on_study = months_on_study ND$months_on_study = months_on_study ## Flatten our data to pass out: ## NE = melt(NE, id.vars=c('months_on_study'), variable.name='dose_amt', value.name='number_exposed') NE$dose_amt = rep(dose, each=length(months_on_study), times=1) ND = melt(ND, id.vars=c('months_on_study'), variable.name='dose_amt', value.name='number_developed') ND$dose_amt = rep(dose, each=length(months_on_study), times=1) ## Combine the two datasets: ## DF = NE DF$number_developed = ND$number_developed return(DF) }