% % Examples 9.15 and 9.17 % % epage 392 (cluster/ % % Written by: % -- % John L. Weatherwax 2008-02-20 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- clear all; close all; clc; addpath('../../Code/CSTool'); load household; % lump everything into one data matrix: X = [men; women]; X = zscore(X); %-- % Apply Hierarchical clustering (epage 382): %-- if( 1 ) ye = pdist(X,'euclid'); %ye = pdist(X,'cityblock'); z = linkage(ye,'single'); %z = linkage(ye,'complete'); %z = linkage(ye,'average'); %z = linkage(ye,'ward'); [H,T] = dendrogram(z); xlabel('datum instance'); title( 'algomative clustering with single linkage' ); saveas( gcf, 'prob_9_2_hierarchical_linkage', 'epsc' ); fprintf('the cophenet coefficient for this clustering is = %10.5f\n',cophenet(z,ye)); end %-- % Apply k-means clustering () %-- if( 1 ) k = 2; % <- assume we have two clusters [cid,nr,centers] = cskmeans(X,k); % plot the two clusters overlayed with truth: figure; tm=plot( X(1:20,1), X(1:20,2), 'sk' ); hold on; % <- this is the truth tw=plot( X(21:40,1), X(21:40,2), 'dk' ); men_cluster = cid(1); % <- this is what MATLAB called the mens cluster ... if( men_cluster==1 ) % <- so this must be what MATLAB called the womans cluster ... women_cluster=2; else women_cluster=1; end am=plot( X(find(cid==men_cluster),1), X(find(cid==men_cluster),2), 'sr', 'markersize', 10 ); aw=plot( X(find(cid==women_cluster),1), X(find(cid==women_cluster),2), 'dr', 'markersize', 10 ); legend( [tm, tw, am, aw], { 'true men', 'true women', 'approx men', 'approx women' }, 'location', 'best' ); saveas( gcf, 'prob_9_2_k_means', 'epsc' ); end