% % example epage ???104 % problem epage 129 % % Appy k-means clustering to the ornsay data set % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- close all; drawnow; clc; clear; addpath( '../../Code/eda_data' ); addpath( '../../Code/eda_toolbox' ); addpath( '../Chapter1' ); %[X,midden,beachdune] = load_oronsay(0,0,0,0); [X,midden,beachdune] = load_oronsay(1,0,1,0); [X,midden,beachdune] = load_oronsay(1,0,1,1); [n,p] = size(X); truth_labels = midden; truth_type = 'midden'; fprintf(['the proptions of ', truth_type ,' labeling in each class\n']); tabulate( truth_labels ) y = pdist(X,'euclidean'); %link_method = 'complete'; link_method = 'average'; % <- gives only one cluster ... z = linkage(y,link_method); figure; dendrogram(z); title( ['oronsay dendrogram with ',link_method,' linkage'] ); khat=3; %fprintf('khat = %10d\n',khat); c_agg_inds = cluster(z,'maxclust',khat); fprintf('agglomerative clustering proportions:\n'); tabulate( c_agg_inds ) % A Modified - Example 5.6 % This illustrates the use of the silhouette statistic and plots. % Get a k-means clustering using 3 clusters, and 5 replicates. % We also ask MATLAB to display the final results for each replicate. % kmus3 = kmeans(X,3,'replicates',5,'display','final'); fprintf('k-means result proportions:\n'); tabulate( kmus3 ); % Get the silhouette plots for both clusterings and the values. figure; [sil_agg, h_agg] = silhouette(X, c_agg_inds); m_sil_agg = mean(sil_agg); title( ['agglomerative clustering; mean(silhouette) value=',num2str(m_sil_agg)] ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_14_oronsay_agglomerative_sil_plot', 'epsc' ); figure; [sil_kmeans, h_kmeans] = silhouette(X, kmus3); m_sil_kmeans = mean(sil_kmeans); title( ['k-means clustering; mean(silhouette) value=',num2str(m_sil_kmeans)] ); saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_14_oronsay_kmeans_sil_plot', 'epsc' ); fprintf('Now computing the Rand index of the derived clusters against truth\n'); fprintf('randind(agglomerative,truth) = %20.6f\n',randind(c_agg_inds,truth_labels)); fprintf('adjrand(agglomerative,truth) = %20.6f\n',adjrand(c_agg_inds,truth_labels)); fprintf('Now computing the Rand index of the derived clusters against truth\n'); fprintf('randind(k_means,truth) = %20.6f\n',randind(kmus3,truth_labels)); fprintf('adjrand(k_means,truth) = %20.6f\n',adjrand(kmus3,truth_labels));