% 
% Written by:
% -- 
% John L. Weatherwax                2007-12-07
% 
% email: wax@alum.mit.edu
% 
% Please send comments and especially bug reports to the
% above email address.
% 
%-----

%close all; 

% the number of experiments: 
N_EXPS = 1000;

% the number of learning episodes (for complete learning this would need to be much larger): 
nEpisodes = 10; 

% the number of random walk states (not including the two terminal states): 
%n_rw_states = 17; 
n_rw_states = 19; 

% the true solution:
%  (when zero is the reward for going off the left end): 
%Vtruth = (0:(n_rw_states+1))/(n_rw_states+1); Vtruth(1)=[]; Vtruth(end)=[]; 
%  (when -1 is the reward for going off the left end): 
Vtruth = -1 + 2*(0:(n_rw_states+1))/(n_rw_states+1); Vtruth(1)=[]; Vtruth(end)=[]; 

% a vector of learning parameters
%alphaV = linspace( 0, 0.3, 20 ); 
alphaV = linspace( 0, 0.3, 100 );
%alphaV = linspace( 0, 0.3, 10 ); 

% a vector of TD(n) lengths
nVect  = [ 1 2 3 4 6 8 15 30 60 100 300 1000 ]; 

figure; hold on; results=zeros(length(alphaV),length(nVect)); 
for ni=1:length(nVect),
  n=nVect(ni); 
  fprintf('working on n=%d...\n',n); 
  tra = zeros(1,length(alphaV)); 
  for ai=1:length(alphaV), 
    a  = alphaV(ai); 
    fprintf('working on alpha=%f...\n',a); 
    tr = zeros(1,N_EXPS); 
    for ei=1:N_EXPS,
      V      = rw_offline_ntd_learn( n, a, n_rw_states, nEpisodes );
      tr(ei) = sqrt( mean( (V-Vtruth).^2 ) ); 
    end
    tra(ai) = mean( tr ); 
  end
  results(:,ni) = tra(:); 
end

phs=plot( alphaV, results ); 
xlabel( 'alpha' ); ylabel( ['average rms error from ',num2str(nEpisodes), ' episodes'] ); 
nVectCL={}; 
for ti=1:length(nVect),
  nVectCL{end+1} = ['TD(n=',num2str(nVect(ti)),')'];
end
legend(phs,nVectCL,'Location','Best');
axis( [ 0, alphaV(end), 0.2, 0.55 ] ); 

saveas(gcf,['./rw_offline_ntd_learn_',num2str(N_EXPS)],'png'); 

return; 

% a simple point case to run: 
n = 3; 
alpha = 0.1; 
rw_offline_ntd_learn