# # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # # EPage 787 # #----- set.seed(0) n_samples = 100 # Create draws from each of the 5 types of bags: # # Let 1 = cherry & 2 = lime # cl_probs = matrix( data=c( 1.0, 0.0, 0.75, 0.25, 0.5, 0.5, 0.25, 0.75, 0.0, 1.0 ), nrow=5, ncol=2, byrow=TRUE ) # each hypothesis is a row bag_1_samples = sample( c(1,2), n_samples, replace=TRUE, prob=cl_probs[1,] ) bag_2_samples = sample( c(1,2), n_samples, replace=TRUE, prob=cl_probs[2,] ) bag_3_samples = sample( c(1,2), n_samples, replace=TRUE, prob=cl_probs[3,] ) bag_4_samples = sample( c(1,2), n_samples, replace=TRUE, prob=cl_probs[4,] ) bag_5_samples = sample( c(1,2), n_samples, replace=TRUE, prob=cl_probs[5,] ) bayesian_learn = function( data, model_probs=cl_probs, model_priors=c(0.1,0.2,0.4,0.2,0.1) ){ # # A function to perform Bayesian learning across time # n_samples = length(data) n_models = dim(model_probs)[1] stopifnot( n_models==length(model_priors) ) # Evaluate the likelihoods P(data_j|h_i) for each sample (not the running total yet): # model_likelihoods = model_probs[,data] # Compute P(data|h_i) for each hypothesis h_i: # Note each row is a hypothesis and each column is a sample (column=1 is the prior) # P_data_given_h = cbind( rep(1,n_models), model_likelihoods ) colnames(P_data_given_h) = NULL for( mi in 1:n_models ){ P_data_given_h[mi,] = cumprod( P_data_given_h[mi,] ) } # Normalize the probabilities to compute the posteriori P(h_i|data): # P_h_given_data = P_data_given_h * matrix( model_priors, nrow=dim(model_probs)[1], ncol=(n_samples+1), byrow=FALSE ) # Likes * P0 (i.e. multiply by the prior) norm_factor = colSums( P_h_given_data ) for( mi in 1:n_models ){ P_h_given_data[mi,] = P_h_given_data[mi,]/norm_factor } # Extract the Bayesian / MAP / ML predictions: # P_d_eq_lime_given_H = matrix( model_probs[,2], nrow=dim(model_probs)[1], ncol=(n_samples+1), byrow=FALSE ) P_next_is_lime_bayesian = colSums( P_d_eq_lime_given_H * P_h_given_data ) # Bayesian prediction h_map = apply( P_h_given_data, 2, which.max ) # find the MAP predicted model P_next_is_lime_map = cl_probs[h_map,2] # MAP prediction h_ml = apply( P_data_given_h, 2, which.max ) # find the ML predicted model P_next_is_lime_ml = cl_probs[h_ml,2] # ML prediction list( "P_h_given_data"=P_h_given_data , "P_data_given_h"=P_data_given_h, "P_next_is_lime_bayesian"=P_next_is_lime_bayesian, "P_next_is_lime_map"=P_next_is_lime_map, "P_next_is_lime_ml"=P_next_is_lime_ml ) } save_plots=TRUE # Now do the learning with a sample history from each bag and compute the predicted lime probability: # res = bayesian_learn( bag_1_samples ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter20/bl_bag_1.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(1,2)) matplot( t(res\$P_h_given_data), type='l', lty=1, xlab='index of candy drawn', ylab='prob(h|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='data from bag 1' ) legend( 40, 0.8, c('bag 1','bag 2','bag 3','bag 4','bag 5'), lty=1, col=1:6 ) grid() plot( res\$P_next_is_lime_bayesian, type='l', lty=1, xlab='index of candy drawn', ylab='prob(next is lime|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='lime prediction' ) lines( res\$P_next_is_lime_map, type='l', lty=1, col=2 ) lines( res\$P_next_is_lime_ml, type='l', lty=1, col=3 ) legend( 40, 0.8, c('Bayes','MAP','ML'), lty=1, col=1:3 ) grid() par(mfrow=c(1,1)) if( save_plots ){ dev.off() } res = bayesian_learn( bag_2_samples ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter20/bl_bag_2.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(1,2)) matplot( t(res\$P_h_given_data), type='l', lty=1, xlab='index of candy drawn', ylab='prob(h|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='data from bag 2' ) #legend( 40, 0.8, c('bag 1','bag 2','bag 3','bag 4','bag 5'), lty=1, col=1:6 ) grid() plot( res\$P_next_is_lime_bayesian, type='l', lty=1, xlab='index of candy drawn', ylab='prob(next is lime|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='lime prediction' ) lines( res\$P_next_is_lime_map, type='l', lty=1, col=2 ) lines( res\$P_next_is_lime_ml, type='l', lty=1, col=3 ) #legend( 40, 0.8, c('Bayes','MAP','ML'), lty=1, col=1:3 ) grid() par(mfrow=c(1,1)) if( save_plots ){ dev.off() } res = bayesian_learn( bag_3_samples ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter20/bl_bag_3.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(1,2)) matplot( t(res\$P_h_given_data), type='l', lty=1, xlab='index of candy drawn', ylab='prob(h|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='data from bag 3' ) #legend( 40, 0.8, c('bag 1','bag 2','bag 3','bag 4','bag 5'), lty=1, col=1:6 ) grid() plot( res\$P_next_is_lime_bayesian, type='l', lty=1, xlab='index of candy drawn', ylab='prob(next is lime|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='lime prediction' ) lines( res\$P_next_is_lime_map, type='l', lty=1, col=2 ) lines( res\$P_next_is_lime_ml, type='l', lty=1, col=3 ) #legend( 40, 0.8, c('Bayes','MAP','ML'), lty=1, col=1:3 ) grid() par(mfrow=c(1,1)) if( save_plots ){ dev.off() } res = bayesian_learn( bag_4_samples ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter20/bl_bag_4.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(1,2)) matplot( t(res\$P_h_given_data), type='l', lty=1, xlab='index of candy drawn', ylab='prob(h|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='data from bag 4' ) #legend( 40, 0.8, c('bag 1','bag 2','bag 3','bag 4','bag 5'), lty=1, col=1:6 ) grid() plot( res\$P_next_is_lime_bayesian, type='l', lty=1, xlab='index of candy drawn', ylab='prob(next is lime|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='lime prediction' ) lines( res\$P_next_is_lime_map, type='l', lty=1, col=2 ) lines( res\$P_next_is_lime_ml, type='l', lty=1, col=3 ) #legend( 40, 0.2, c('Bayes','MAP','ML'), lty=1, col=1:3 ) grid() par(mfrow=c(1,1)) if( save_plots ){ dev.off() } res = bayesian_learn( bag_5_samples ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter20/bl_bag_5.eps", onefile=FALSE, horizontal=FALSE) } par(mfrow=c(1,2)) matplot( t(res\$P_h_given_data), type='l', lty=1, xlab='index of candy drawn', ylab='prob(h|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='data from bag 5' ) #legend( 40, 0.8, c('bag 1','bag 2','bag 3','bag 4','bag 5'), lty=1, col=1:6 ) grid() plot( res\$P_next_is_lime_bayesian, type='l', lty=1, xlab='index of candy drawn', ylab='prob(next is lime|data)', xlim=c(1,n_samples+1), ylim=c(0,1), main='lime prediction' ) lines( res\$P_next_is_lime_map, type='l', lty=1, col=2 ) lines( res\$P_next_is_lime_ml, type='l', lty=1, col=3 ) #legend( 40, 0.8, c('Bayes','MAP','ML'), lty=1, col=1:3 ) grid() par(mfrow=c(1,1)) if( save_plots ){ dev.off() }