% % example epage ???104 % problem epage 129 % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- close all; drawnow; clc; clear; addpath( '../../Code/eda_data' ); addpath('../Chapter1'); addpath( '../../Code/eda_toolbox' ); % Modified from Example 5.5: which shows the graphical Mojena rule. set(0,'recursionlimit',1000); %X = load_spam(1,0,1,0); % <- preprocess using the z-score ... only %X = load_spam(0,0,0,1); % <- do PCA based dimensionality reduction on the direct data ... %X = load_spam(1,0,1,1); % <- do PCA based dimensionality reduction ... with z-score ... produces chaining in the dendrogram %X = load_spam(1,0,1,2); % <- do SVD based dimensionality reduction ... with z-score ... produces chaining in the dendrogram X = load_spam(1,1,1,1); % <- do PCA based dimensionality reduction ... with z-score ... seems to work %X = load_spam(1,1,1,2); % <- do SVD based dimensionality reduction (with column ordering) ... with z-score ... [n,p] = size(X); y = pdist(X,'euclidean'); dn = '../../WriteUp/Graphics/Chapter5/'; %dn = '../Graphics/Chapter5/'; linkage_type = 'average'; %linkage_type = 'complete'; % We will do this somewhat differently, so it is in keeping with the types of 'elbow' plots in previous % chapters and applcations. % Get the distances and the linkage. % Use the standardized Euclidean distance. Y = pdist(X,'euclidean'); Z = linkage(Y,'average'); % Plot dendrogram with fewer leaf nodes. dendrogram(Z,15); nc = 10; % Flip the Z matrix - makes it easier. Zf = flipud(Z); % Now get the vector of means % and standard deviations for i = 1:nc abar(i) = mean(Zf(i:end,3)); astd(i) = std(Zf(i:end,3)); end % Get the y values for plotting. yv = (Zf(1:nc,3) - abar(:))./astd(:); xv = 1:nc; figure; plot(xv,yv,'-o') xlabel('Number of Clusters') ylabel('Standardized Fusion Levels') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_11_spam_mojena_1', 'epsc' ); % We can also plot just the fusion levels % and look for the elbow. figure; plot(1:nc,Zf(1:nc,3),'o-') xlabel('Number of Clusters') ylabel('Raw Fusion Levels') saveas( gcf, '../../WriteUp/Graphics/Chapter5/prob_5_11_spam_mojena_2', 'epsc' );