% % example epage 104 % problem epage 128 % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- close all; drawnow; clc; clear; addpath( '../../Code/eda_data' ); addpath('../Chapter1'); % get some 2-D uniform random data % X = rand( 500, 2 ); data = X; figure; plot( X(:,1), X(:,2), 'x' ); title( 'the original data' ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_7_orig_data', 'epsc' ); y = pdist(data,'euclidean'); link_method = 'average'; if( 0 ) z = linkage(y); % <- use the default of single linkage d(S,R) = min_{i \in S,j \in R}(x_i,x_j) figure; dendrogram(z); title( 'single linkage' ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_7_sl', 'epsc' ); end % try the average distance ... considered by many to be quite good: % y = pdist(data,'euclidean'); z = linkage(y,link_method); figure; dendrogram(z); title( [link_method, ' linkage' ] ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_7_average', 'epsc' ); % Consider the uniform-gap statistics % -- modified from Example 5.7 % K = 10; B = 100; pdist_method = 'euclidean'; [Z, khat, gap, Wobs, muWb] = gap_uniform(X,K,B,link_method,pdist_method); fprintf('khat = %10d\n',khat); figure; plot(1:K,Wobs,'o-',1:K,muWb,'x-') legend({'Observed';'Expected'}) xlabel('Number of Clusters k') ylabel('Observed and Expected log(W_k)') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_7_oe', 'epsc' ); figure,plot(1:K,gap,'o-'),title('Gap') xlabel('Number of Clusters k'); ylabel('Gap') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_7_gap', 'epsc' );