% % An extension of Example 6.6: This one shows how to do agglomerative model- based clustering. % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- close all; drawnow; clc; clear; addpath( '../../Code/eda_data' ); addpath( '../../Code/eda_toolbox' ); addpath( '../Chapter1' ); %[X,midden,beachdune] = load_oronsay(0,0,0,0); [X,midden,beachdune] = load_oronsay(1,0,1,0); [X,midden,beachdune] = load_oronsay(1,0,1,1); [n,p] = size(X); truth_labels = midden; truth_type = 'midden'; fprintf(['the proptions of ', truth_type ,' labeling in each class\n']); tabulate( truth_labels ) % Do the agglomerative model-based clustering which is included in the EDA Toolbox and the MBC Toolbox. Z = agmbclust(X); % Construct a dendrogram. figure; dendrogram(Z); title('Results for Oronsay Data - Agglomerative MBC') % Consider the uniform-gap statistics to determine the number of clusters % -- modified from Example 5.7 % K = 6; B = 50; [Z, khat, gap, Wobs, muWb] = gap_uniform(X,K,B); fprintf('khat = %10d\n',khat); % from the gap-statistic plot we find a prediction of four clusters figure; plot(1:K,Wobs,'o-',1:K,muWb,'x-') legend({'Observed';'Expected'}) xlabel('Number of Clusters k') ylabel('Observed and Expected log(W_k)') saveas( gcf, '../../WriteUp/Graphics/Chapter6/prob_6_1_wobs', 'epsc' ); figure; plot(1:K,gap,'o-'); xlabel('Number of Clusters k'); ylabel('Gap') saveas( gcf, '../../WriteUp/Graphics/Chapter6/prob_6_1_gap', 'epsc' ); % We can apply the silhouette procedure for this result after we find a partition. % Use khat=2 groups. % khat=4 groups cind = cluster(Z,'maxclust',2); %cind = cluster(Z,'maxclust',4); figure; [S,H] = silhouette(X,cind); fprintf('mean silhouette value = %10.6f\n', mean(S) ); title('Silhouette Plot - Agglomerative MBC') saveas( gcf, '../../WriteUp/Graphics/Chapter6/prob_6_1_silhouette', 'epsc' ); adjrand( cind, truth_labels ) randind( cind, truth_labels )