% R_LEARN_ACQ_SCRIPT - R learning for the access control que problem % % Written by: % -- % John L. Weatherwax 2007-12-03 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- clear all; close all; clc; addpath( genpath( '../../../FullBNT-1.0.4/KPMstats/' ) ); n_servers = 10; % the number of servers h = 0.5; % probability of getting a high paying customer p = 0.06; % probability a machine in service finished during this timestep %alpha = 0.1; % learning rate for Q alpha = 0.01; % learning rate for Q beta = 0.01; % learning rate for rho %MAX_N_EPISODES = 1000; %MAX_N_EPISODES = 1e4; %MAX_N_EPISODES = 1e5; MAX_N_EPISODES = 2e6; MAX_N_EPISODES = 10e6; [rho,Q,Qmax,Act] = R_learn_acq(alpha,beta,h,p,n_servers,MAX_N_EPISODES); fprintf('rho = %f\n',rho); fprintf('max optimal action function Qmax is given by ...\n'); fprintf('priority increases downward; number of free servers increase going right; \n'); fprintf('[1,2,3,4] x [1,2,3,4,5,6,7,8,9,,10]\n'); disp(Qmax); fprintf('optimal (greedy) action is given by ...\n'); disp(Act); figure; imagesc( 1:n_servers, [1, 2, 4, 8], Qmax ); colorbar; xlabel( 'number of free servers' ); ylabel( 'cust. priority' ); figure; imagesc( 1:n_servers, [1, 2, 4, 8], Act ); colorbar; xlabel( 'number of free servers' ); ylabel( 'cust. priority' ); saveas( gcf, 'optimal_greedy_policy', 'png' ); figure; hold on; grid on; ph1=plot( 1:10, Qmax(1,:), '-b' ); ph2=plot( 1:10, Qmax(2,:), '-r' ); ph3=plot( 1:10, Qmax(3,:), '-g' ); ph4=plot( 1:10, Qmax(4,:), '-m' ); legend( [ ph1, ph2, ph3, ph4 ], { 'priority 1', 'priority 2', 'priority 4', 'priority 8' } ); saveas( gcf, 'value_of_best_action', 'png' );