% WGW_W_KINGS_N_WIND_SCRIPT - Performs on-policy sarsa iterative action value funtion estimation for the 
% windy grid world example alowing kings (diagonal) moves.
% 
% Written by:
% -- 
% John L. Weatherwax                2007-12-03
% 
% email: wax@alum.mit.edu
% 
% Please send comments and especially bug reports to the
% above email address.
% 
%-----

close all; 

alpha = 1e-1; 

sideII  = 7; sideJJ = 10; 

% the wind in each column: 
wind = [ 0 0 0 1 1 1 2 2 1 0 ]; 

% the beginning and terminal states (in matrix notation): 
s_start = [ 4, 1 ]; 
s_end   = [ 4, 8 ]; 

MAX_N_EPISODES=20; 
MAX_N_EPISODES=1e4;
%MAX_N_EPISODES=1e5;
%MAX_N_EPISODES=1e6;
MAX_N_EPISODES=10e6;

[Q,ets] = wgw_w_kings_n_wind(alpha,sideII,sideJJ,s_start,s_end,wind,MAX_N_EPISODES);

pol_pi = zeros(sideII,sideJJ); V = zeros(sideII,sideJJ); 
for ii=1:sideII,
  for jj=1:sideJJ,
    sti = sub2ind( [sideII,sideJJ], ii, jj ); 
    [V(ii,jj),pol_pi(ii,jj)] = max( Q(sti,:) ); 
  end
end

plot_gw_policy(pol_pi,s_start,s_end,wind);
title( 'policy (1=>up,2=>down,3=>right,4=>left,5=>NW,6=>NE,7=>SE,8=>SW)' ); 
fn = sprintf('wgw_w_kings_n_wind_policy_nE_%d',MAX_N_EPISODES); saveas( gcf, fn, 'png' ); 

figure; imagesc( V ); colorbar; 
title( 'state value function' ); 
fn = sprintf('wgw_w_kings_n_wind_state_value_fn_nE_%d',MAX_N_EPISODES); saveas( gcf, fn, 'png' ); 

figure; plot( 1:length(ets), ets, '-x' ); wgw_w_kings_n_wind_ets = ets; 
%fn = sprintf('wgw_w_kings_n_wind_learning_rate_nE_%d',MAX_N_EPISODES); saveas( gcf, fn, 'fig' );