% % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- close all; drawnow; clc; clear; addpath( '../../Code/eda_data' ); addpath( '../../Code/eda_toolbox' ); addpath( '../Chapter1' ); X = load_sparrow(0,0,0,0); [n,p] = size(X); y = pdist(X,'euclidean'); if( 0 ) z = linkage(y,'single'); figure; dendrogram(z); title( 'sparrow dendrogram with single linkage' ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_2_sparrow_sl', 'epsc' ); end z = linkage(y,'average'); figure; dendrogram(z); title( 'sparrow dendrogram with average linkage' ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_2_sparrow_av', 'epsc' ); % Implemente the inconsistency metric ... not sure that this is correct ... % if( 0 ) y_ic = inconsistent(z); % take the last column ... it constains the inconsistency coefficient: ic = y_ic(:,end); figure; plot( ic, 'o' ); % from this plot we look like 0.6 should be a good threshold ... t = 0.6; tmp = cluster(z,'cutoff',t); end % Consider the uniform-gap statistics % -- modified from Example 5.7 % K = 5; B = 50; link_method = 'complete'; %link_method = 'average'; % <- gives only one cluster ... pdist_method = 'euclidean'; [Z, khat, gap, Wobs, muWb] = gap_uniform(X,K,B,link_method,pdist_method); fprintf('khat = %10d\n',khat); figure; plot(1:K,Wobs,'o-',1:K,muWb,'x-') legend({'Observed';'Expected'}) xlabel('Number of Clusters k') ylabel('Observed and Expected log(W_k)') figure,plot(1:K,gap,'o-'),title('Gap') xlabel('Number of Clusters k') ylabel('Gap') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_2_sparrow_gap', 'epsc' ); % using the suggested khat clusters lets visualize them: % khat = 3; % even thought we predict two clusters ... lets plot three to observe the hypothetical outliers cinds = cluster(Z,'maxclust',khat); plot_labeled(X(:,1),X(:,2),cinds); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_2_sparrow_clusters', 'epsc' );