% % example epage 104 % problem epage 128 % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- close all; drawnow; clc; clear; addpath( '../../Code/eda_data' ); addpath('../Chapter1'); % get some bivariate data with two well separated clusters % x1 = mvnrnd( 3*[-1,-1], [ 1, 0; 0, 1 ], 100 ); x2 = mvnrnd( 3*[+1,+1], [ 1, 0; 0, 1 ], 100 ); X = [ x1; x2 ]; data = X; figure; plot( X(:,1), X(:,2), 'x' ); title( 'the original data' ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_8_orig_data', 'epsc' ); y = pdist(data,'euclidean'); if( 0 ) z = linkage(y); % <- use the default of single linkage d(S,R) = min_{i \in S,j \in R}(x_i,x_j) figure; dendrogram(z); title( 'single linkage' ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_8_sl', 'epsc' ); end % try the average distance ... considered by many to be quite good: % y = pdist(data,'euclidean'); z = linkage(y,'average'); figure; dendrogram(z); title( 'average linkage' ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_8_average', 'epsc' ); % Consider the uniform-gap statistics % -- modified from Example 5.7 % K = 10; B = 100; link_method = 'average'; pdist_method = 'euclidean'; [Z, khat, gap, Wobs, muWb] = gap_uniform(X,K,B,link_method,pdist_method); fprintf('khat = %10d\n',khat); figure; plot(1:K,Wobs,'o-',1:K,muWb,'x-') legend({'Observed';'Expected'}) xlabel('Number of Clusters k') ylabel('Observed and Expected log(W_k)') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_8_oe', 'epsc' ); figure,plot(1:K,gap,'o-'),title('Gap') xlabel('Number of Clusters k'); ylabel('Gap') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_8_gap', 'epsc' );