N = 1000000 # number of MC simulations

set.seed(12345)

n_states = 10

V_bar = rep(0, n_states)
V_bar[n_states] = 550
n_samples = rep(0, n_states) # how many times do we visit this state
for( n in 1:N ){
    # select a state to update:
    #
    st = sample.int(n_states-1, 1)

    # sample an offer:
    #
    p_hat = runif(1, min=500, max=600)
    v_hat = max( p_hat, V_bar[st+1] )

    # Update our approximation:
    #
    alpha = 5/(5+n-1)
    V_bar[st] = ( 1 - alpha ) * V_bar[st] + alpha * v_hat
    n_samples[st] = n_samples[st]+1
}

#postscript("../../WriteUp/Graphics/Chapter4/chap_4_prob_14.eps", onefile=FALSE, horizontal=FALSE)
plot(V_bar, type='l', xlab='time', ylab='V(i)', pch=19, cex=1.5)
grid()
#dev.off()