library(survival) download_data = TRUE ## Read in the data: ## if( download_data ){ DF = read.table('http://lib.stat.cmu.edu/datasets/stanford', header=FALSE, skip=137, nrows=103, na.strings=c('NA', '-')) colnames(DF) = c('ID', 'DOB_Month', 'DOB_Day', 'DOB_Year', # DOB = date of birth 'DOA_Month', 'DOA_Day', 'DOA_Year', # DOA = date of accept 'DOT_Month', 'DOT_Day', 'DOT_Year', # DOT = date of transplant 'DLS_Month', 'DLS_Day', 'DLS_Year', # DLS = date last seen 'Dead', 'Prior', 'Mismatch_number', 'HLA_A2', 'Mismatch_Score', 'Reject') DF$ID = NULL } ## Compute: ## ## 1) the age at surgery and ## 2) how long the patient survived after the heart transplant ## DF$DOB = ISOdate(DF$DOB_Year, DF$DOB_Month, DF$DOB_Day) DF$DOT = ISOdate(DF$DOT_Year, DF$DOT_Month, DF$DOT_Day) DF$DLS = ISOdate(DF$DLS_Year, DF$DLS_Month, DF$DLS_Day) DF$Age = as.double(difftime (DF$DOT, DF$DOB, units='days'))/365 ## Age in years DF$Time = as.double(difftime (DF$DLS, DF$DOT, units='days'))/365 ## Last seen in years cc = complete.cases(DF[, c('Age', 'Time', 'Dead')]) print(head(DF[cc, c('Age', 'Time', 'Dead')])) ## Look at the hazard functions based on only the Age variable: ## res1 = coxph(Surv(Time, Dead) ~ Age, data=DF) print(res1) res2 = coxph(Surv(Time, Dead) ~ Age + I(Age^2), data=DF) print(res2) ## Look at how the hazard function depends with Age: ## age_range = range(DF$Age[complete.cases(DF$Age)]) print(age_range) ages = seq(age_range[1], age_range[2], length.out=100) haz_hat = predict(res2, newdata=data.frame(Age=ages)) ##postscript('../../WriteUp/Graphics/Chapter12/chap_12_shts_quadradic_hazard.eps', onefile=FALSE, horizontal=FALSE) plot(ages, haz_hat, type='l', xlab='Age', ylab='hazard (Age)') grid() ##dev.off()