% 
% Written by:
% -- 
% John L. Weatherwax                2007-12-07
% 
% email: wax@alum.mit.edu
% 
% Please send comments and especially bug reports to the
% above email address.
% 
%-----

% the number of experiments: 
N_EXPS = 100;

% the number of learning episodes (for complete learning this would need to be much larger): 
nEpisodes = 10; 

% the number of random walk states (not including the two terminal states): 
%n_rw_states = 17; 
n_rw_states = 19; 
%n_rw_states = 7; 

% the true solution:
%  (when zero is the reward for going off the left end): 
%Vtruth = (0:(n_rw_states+1))/(n_rw_states+1); Vtruth(1)=[]; Vtruth(end)=[]; 
%  (when -1 is the reward for going off the left end): 
Vtruth = -1 + 2*(0:(n_rw_states+1))/(n_rw_states+1); Vtruth(1)=[]; Vtruth(end)=[]; 

% a vector of learning parameters with 0.0 and 1.0
%alphaV = linspace( 0, 1, 10 ); 
%alphaV = linspace( 0, 1, 20 );
alphaV = linspace( 0, 1, 100 );

% a vector of TD(lambda) lambdas:
lamVect = [ 0, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.975, 0.99, 1.0 ]; 

results=zeros(length(alphaV),length(lamVect)); 
for ni=1:length(lamVect),
  lam=lamVect(ni); 
  fprintf('working on lambda=%f...\n',lam); 
  tra = zeros(1,length(alphaV)); 
  for ai=1:length(alphaV), 
    a  = alphaV(ai); 
    fprintf('working on alpha=%f...\n',a); 
    tr = zeros(1,N_EXPS); 
    fprintf('working on experiment number (of %d): ',N_EXPS); 
    for ei=1:N_EXPS,
      if( mod(ei,10)==0 ) fprintf('%d, ',ei); end 
      V      = rw_online_tdl_learn( lam, a, n_rw_states, nEpisodes );
      tr(ei) = sqrt( mean( (V-Vtruth).^2 ) ); 
    end
    fprintf('\n'); 
    tra(ai) = mean( tr ); 
  end
  results(:,ni) = tra(:); 
end

figure; hold on; 
phs=plot( alphaV, results ); 
xlabel( 'alpha' ); ylabel( ['average rms error from ',num2str(nEpisodes), ' episodes'] ); 
lamVectCL={}; 
for ti=1:length(lamVect),
  lamVectCL{end+1} = ['TD(\lambda=',num2str(lamVect(ti)),')'];
end
%legend(phs,lamVectCL,'Location','Best');
legend(phs,lamVectCL,'Location','North');
axis( [ 0.0, alphaV(end), 0.1, 0.55 ] ); 

saveas(gcf,['./rw_online_tdl_learn_',num2str(N_EXPS)],'png'); 

return; 

% a simple point case to run: 
n = 3; 
alpha = 0.1; 
rw_online_ntd_learn