% % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- close all; drawnow; clc; clear; addpath( '../../Code/eda_data' ); addpath( '../../Code/eda_toolbox' ); X = load_geyser(0,0); %X = load_geyser(1,1); % the z-score no difference in the clusters ... [n,p] = size(X); y = pdist(X,'euclidean'); if( 0 ) z = linkage(y,'single'); figure; denndrogram(z); title( 'geyser dendrogram with single linkage' ); %saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_2_geyser_sl', 'epsc' ); end z = linkage(y,'average'); figure; dendrogram(z); title( 'geyser dendrogram with average linkage' ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_2_geyser_av', 'epsc' ); % Implemente the inconsistency metric ... not sure that this is correct ... % if( 0 ) y_ic = inconsistent(z); % take the last column ... it constains the inconsistency coefficient: ic = y_ic(:,end); figure; plot( ic, 'o' ); % from this plot we look like 0.6 should be a good threshold ... t = 0.6; tmp = cluster(z,'cutoff',t); end % Consider the uniform-gap statistics % -- modified from Example 5.7 % K = 10; B = 10; link_method = 'complete'; %link_method = 'average'; % <- gives only one cluster ... pdist_method = 'euclidean'; [Z, khat, gap, Wobs, muWb] = gap_uniform(X,K,B,link_method,pdist_method); fprintf('khat = %10d\n',khat); figure; plot(1:K,Wobs,'o-',1:K,muWb,'x-') legend({'Observed';'Expected'}) xlabel('Number of Clusters k') ylabel('Observed and Expected log(W_k)') figure; plot(1:K,gap,'o-'); xlabel('Number of Clusters k'); ylabel('Gap') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_2_geyser_gap', 'epsc' ); % using the suggested khat clusters lets visualize them: % cinds = cluster(Z,'maxclust',khat); I = 1:size(X(:),1); figure; ind = find(cinds==1); plot( I(ind), X(ind), 'rx', 'MarkerSize', 10 ); hold on; ind = find(cinds==2); plot( I(ind), X(ind), 'bo', 'MarkerSize', 10 ); hold on; xlabel('sample index'); ylabel('sample value') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_2_geyser_clusters', 'epsc' );