% % example epage 104 % problem epage 128 % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- close all; drawnow; clc; clear; addpath( '../../Code/eda_data' ); addpath('../Chapter1'); % get some 2-D uniform random data % X = rand( 500, 2 ); data = X; figure; plot( X(:,1), X(:,2), 'x' ); title( 'the original data' ); %saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_9_orig_data', 'epsc' ); y = pdist(data,'euclidean'); % try the average distance ... considered by many to be quite good: % z = linkage(y,'average'); figure; dendrogram(z); title( 'average linkage' ); %saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_9_average', 'epsc' ); % Consider the gap-PCA statistics % -- modified from Example 5.7 % K = 10; B = 100; link_method = 'average'; pdist_method = 'euclidean'; [Z, khat, gap, Wobs, muWb] = gap_pca(X,K,B,link_method,pdist_method); fprintf('khat = %10d\n',khat); figure; plot(1:K,Wobs,'o-',1:K,muWb,'x-') legend({'Observed';'Expected'}) xlabel('Number of Clusters k') ylabel('Observed and Expected log(W_k)') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_9_oe_unif', 'epsc' ); figure,plot(1:K,gap,'o-'),title('Gap') xlabel('Number of Clusters k'); ylabel('Gap') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_9_gap_unif', 'epsc' ); % get some bivariate data with two well separated clusters % x1 = mvnrnd( 3*[-1,-1], [ 1, 0; 0, 1 ], 100 ); x2 = mvnrnd( 3*[+1,+1], [ 1, 0; 0, 1 ], 100 ); X = [ x1; x2 ]; data = X; figure; plot( X(:,1), X(:,2), 'x' ); title( 'the original data' ); %saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_9_orig_data', 'epsc' ); y = pdist(data,'euclidean'); % try the average distance ... considered by many to be quite good: % z = linkage(y,'average'); figure; dendrogram(z); title( 'average linkage' ); %saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_9_average', 'epsc' ); % Consider the uniform-gap statistics % -- modified from Example 5.7 % K = 10; B = 100; link_method = 'average'; pdist_method = 'euclidean'; [Z, khat, gap, Wobs, muWb] = gap_uniform(X,K,B,link_method,pdist_method); fprintf('khat = %10d\n',khat); figure; plot(1:K,Wobs,'o-',1:K,muWb,'x-') legend({'Observed';'Expected'}) xlabel('Number of Clusters k') ylabel('Observed and Expected log(W_k)') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_9_oe_clust', 'epsc' ); figure,plot(1:K,gap,'o-'),title('Gap') xlabel('Number of Clusters k'); ylabel('Gap') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_9_gap_clust', 'epsc' );