% DO_MNT_CAR_EXPS - Performs the experiments presented in the book on the mountain valley problem % which is linear, gradient-decent SARSA(\lambda) with binary features and an \epsilon-greedy policy % % Written by: % -- % John L. Weatherwax 2008-02-19 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- clc; close all; more off; % the probabiity of an exploratory move: epsilon = 0.0; % discount factor: gamma = 1.0; %gamma = 0.9; % the number of episodes to perform: %nEpisodes = 3; nEpisodes = 20; %nEpisodes = 2000; % the type of elagability trace (replacing=0 or accumulating=1): ACC_ET = 1; % the number of monte carlo trials to perform ... for good statistics: %N_MC = 2; N_MC = 30; nAlphaV = 5; GetTiles_Mex_Script; % <- used to just to make sure that the C code has been "mexed" %-- % ACCUMULATING/REPLACING TRACES: %-- lambdaV = [ 0.4, 0.7, 0.8, 0.9, 0.95, 0.99, 1.0 ]; alphaV = linspace( 0, 1.2, nAlphaV ); allResults = zeros(length(lambdaV),length(alphaV)); imc_results = zeros(1,N_MC); plt_lbls=cell(1,length(lambdaV)); for li=1:length(lambdaV), lambda = lambdaV(li); fprintf('working on lambda=%10.5f...\n',lambda); for ai=1:length(alphaV), alpha = alphaV(ai); fprintf(' working on alpha=%10.5f...\n',alpha); for mci=1:N_MC, if( mod(mci,10)==0 ) fprintf(' working on monte carlo=%10d...\n',mci); end [theta,atspe] = mnt_car_learn(nEpisodes, epsilon,gamma,alpha,lambda,ACC_ET, 0); imc_results(mci) = atspe; end allResults(li,ai) = mean(imc_results); end plt_lbls{li} = sprintf('lambda = %.2f',lambda); end figure; ph=plot( alphaV, allResults.', '-' ); axis tight; legend(ph,plt_lbls); xlabel('alpha'); ylabel('steps per episode'); if( ACC_ET==0 ) title('replacing traces various lambdas'); saveas( gcf, sprintf('mc_learning_curves_replacing_et_mxts_ne_%d_nmc_%d', nEpisodes, N_MC), 'png' ); else title('accumulating traces various lambdas'); saveas( gcf, sprintf('mc_learning_curves_accumulating_et_mxts_ne_%d_nmc_%d', nEpisodes, N_MC), 'png' ); end