% % Written by: % -- % John L. Weatherwax 2007-12-07 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- % the number of experiments: N_EXPS = 100; %N_EXPS = 1000; % the number of learning episodes (for complete learning this would need to be much larger): nEpisodes = 20; % the number of non-terminal states in this Markov chain: n_rw_states = 5; % the true solution: Vtruth = 0.5 + (0.1:0.1:0.5); % a vector of learning parameters with 0.0 and 1.0 %alphaV = linspace( 0, 0.6, 10 ); alphaV = linspace( 0, 0.6, 100 ); lambda = 0.9; results_acc=zeros(length(alphaV),N_EXPS); results_rep=zeros(length(alphaV),N_EXPS); fprintf('working on experiment number (of %d): ',N_EXPS); for ei=1:N_EXPS, if( mod(ei,10)==0 ) fprintf('%d, ',ei); end for ai=1:length(alphaV), a = alphaV(ai); %fprintf('working on alpha=%f...\n',a); V_w_at = eg_7_5_learn_at( lam, a, n_rw_states, nEpisodes ); results_acc(ai,ei) = sqrt( mean( (V_w_at-Vtruth).^2 ) ); V_w_rp = eg_7_5_learn_rt( lam, a, n_rw_states, nEpisodes ); results_rep(ai,ei) = sqrt( mean( (V_w_rp-Vtruth).^2 ) ); end end fprintf('\n'); results_acc = mean( results_acc, 2 ); results_rep = mean( results_rep, 2 ); figure; hold on; ph1=plot( alphaV, results_acc, '-xr' ); hold on; ph2=plot( alphaV, results_rep, '-og' ); grid on; xlabel( 'alpha' ); ylabel( ['average rms error from ',num2str(nEpisodes), ' episodes'] ); legend( [ph1,ph2], {'Accumulating Traces','Replacing Traces'}, 'Location', 'Best' ); axis( [ 0.0, alphaV(end), 0.0, 1 ] ); saveas(gcf,['./eg_7_5_results_',num2str(N_EXPS)],'png'); return;