source('utils.R')

set.seed(12345)

DF_orig = load_table_11_5_data()

# The Additional Treatment Time data given in this exercise:
# 
DF = data.frame( 
    time=c( 6, 6, 7, 9, 10, 11, 13, 15, 16, 19, 20, 22, 23, 32 ),
    age=c( 67, 65, 58, 56, 49, 61, 62, 50, 67, 50, 55, 58, 47, 52 )
)
DF$deceased = 1

DF$deceased[c(2, 4, 5, 6, 8, 10, 11, 14)] = 0 # patient is still alive
DF_new = DF

# Apply a Monte-Carlo permutation test on this data to see if there is a difference between the medians:
# 
X = median(DF_new$time) - median(DF_orig$time) # our original statistic

n_orig = dim(DF_orig)[1]
n_new = dim(DF_new)[1]
data = c( DF_new$time, DF_orig$time )

B = 500
perm_stats = rep(NA, B)
for( bi in 1:B ){
    indx = sample.int( length(data), n_new )
    new_boot = data[indx]
    orig_boot = data[-indx]

    perm_stats[bi] = median(new_boot) - median(orig_boot)
}

hist( perm_stats, breaks=20,
     xlab="median(Time) (new treatment) - median(Time) (old treatment)",
     main="Possible Values for the Test Statistic under the Null Hypothesis" )
abline( v=X, col='red' )

# What is the probability of getting the value obtained (or a larger one):
#
alpha = sum( perm_stats >= X ) / length( perm_stats )
print( sprintf("alpha (prob. type I error): %10.6f", alpha) )