N = 1000000 # number of MC simulations set.seed(12345) n_states = 10 V_bar = rep(0, n_states) V_bar[n_states] = 550 n_samples = rep(0, n_states) # how many times do we visit this state for( n in 1:N ){ # select a state to update: # st = sample.int(n_states-1, 1) # sample an offer: # p_hat = runif(1, min=500, max=600) v_hat = max( p_hat, V_bar[st+1] ) # Update our approximation: # alpha = 5/(5+n-1) V_bar[st] = ( 1 - alpha ) * V_bar[st] + alpha * v_hat n_samples[st] = n_samples[st]+1 } #postscript("../../WriteUp/Graphics/Chapter4/chap_4_prob_14.eps", onefile=FALSE, horizontal=FALSE) plot(V_bar, type='l', xlab='time', ylab='V(i)', pch=19, cex=1.5) grid() #dev.off()