% % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- addpath( '../../Code/eda_data' ); addpath( '../../Code/eda_toolbox' ); close all; drawnow; clc; clear; rand('seed',0); randn('seed',0); load skulls; data = skullsdata; % [n=number of samples=40; p=number of features=12] [n,p] = size(data); % apply some preprocessing on our data ... % the skulls data seems to almost be in "zscore" form ... % % the mean of each feature is almost zero and the standard deviation is almost one ... % % so I'm not sure that this will help or hurt % if( 1 ) % this data comes from a natural process ... might be gaussian ... apply the zscore transformation f_mu = mean( data ); f_sd = std( data ); data_t = ( data - repmat( f_mu, [n,1] ) ) ./ repmat( f_sd, [n,1] ); data = data_t; end %-- % 1) apply non-metric multidimensional scaling on the skulls data: % %-- D = pdist(data,'euclidean'); % <- the dissimilarities ... mdsOut = nmmds(D,10,1); % <- project into 10 dimensions and use the Minkowski metric (with R=1 = city block distance) figure; hold on; ind_f = 1:18; % <- female indices ind_m = 19:40; % <- male indices h_f=plot(mdsOut(ind_f,1),mdsOut(ind_f,2),'rd','MarkerSize',8,'MarkerFaceColor','r'); hold on; h_m=plot(mdsOut(ind_m,1),mdsOut(ind_m,2),'gs','MarkerSize',8,'MarkerFaceColor','g'); grid on; legend( [h_f,h_m], {'female', 'male'}, 'location', 'northwest' ); xlabel('MDS_1'); ylabel('MDS_2'); title(''); saveas( gcf, '../../WriteUp/Graphics/Chapter3/prob_3_2_skulls_NMMDS_proj', 'epsc' ); %-- % 2) Now compute the SMACOEF procedure like Example~3.2 on this data: % which illustrates the SMACOF algorithm for metric MDS. % y = data; D = squareform(pdist(y,'seuclidean')); [n,p] = size(D); % Turn off this warning... warning off MATLAB:divideByZero % Get the first term of stress. % This is fixed - does not depend on the configuration. stress1 = sum(sum(D.^2))/2; % Now find an initial configuration - randomly generate. % First need to compute the stress for this 2-D configuration. d = 2; Z = unifrnd(-2,2,n,d); % Find the stress for this. DZ = squareform(pdist(Z)); stress2 = sum(sum(DZ.^2))/2; stress3 = sum(sum(D.*DZ)); oldstress = stress1 + stress2 - stress3; % Iterate until stress converges. tol = 10^(-6); dstress = realmax; numiter = 1; dstress = oldstress; while dstress > tol & numiter <= 100000 numiter = numiter + 1; % Now get the update BZ = -D./DZ; for i = 1:n BZ(i,i) = 0; BZ(i,i) = -sum(BZ(:,i)); end X = n^(-1)*BZ*Z; Z = X; % Now get the distances % Find the stress DZ = squareform(pdist(Z)); stress2 = sum(sum(DZ.^2))/2; stress3 = sum(sum(D.*DZ)); newstress = stress1 + stress2 - stress3; dstress = oldstress - newstress; oldstress = newstress; end % lets plot the first two projections ... % figure; ind_f = 1:18; % <- female indices ind_m = 19:40; % <- male indices mdsOut = X; h_f=plot(mdsOut(ind_f,1),mdsOut(ind_f,2),'rd','MarkerSize',8,'MarkerFaceColor','r'); hold on; h_m=plot(mdsOut(ind_m,1),mdsOut(ind_m,2),'gs','MarkerSize',8,'MarkerFaceColor','g'); grid on; legend([h_f,h_m],{'Female';'Male'}, 'location', 'northeast' ); xlabel('MDS_1'); ylabel('MDS_2'); title(''); grid on; saveas( gcf, '../../WriteUp/Graphics/Chapter3/prob_3_2_skulls_SMACOF_proj', 'epsc' );