function [ nClusts, means, vars, nSamplesPerClust ] = prob_8_3() % PROB_8_3 - % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- data = [ 1 5; 2 5; 3 4; 6 4; 7 4; 8 5; ]; fh=figure; plot( data(:,1), data(:,2), 'ok' ); axis( [ 0 9 3 6 ] ); grid on; % Compute theta and tau following Mucciardi and Gose: % % 1) Normalize samples by marginal standard deviation: stdData = std( data ); dataN = data ./ repmat( stdData, size(data,1), 1 ); figure; plot( dataN(:,1), dataN(:,2), 'ok' ); grid on; % 2) Compute the average of each samples point to its L1 nearest neighbor: L1 = 4; tau = mDSTLNN( dataN, L1 ); L2 = 2; tauTheta = mDSTLNN( dataN, L2 ); theta = tauTheta/tau; Niters = 20; %var_floor = 0.1*(min(stdData)^2); %var_floor = 0.1*(max(stdData)^2); var_floor = 0.1*0.5*(min(stdData)^2 + max(stdData)^2); MAX_NUMBER_OF_DIFFICULT_SAMPLES = 3; % % Code begins: % -- nClusts = 0; nSamplesPerClust = []; means = []; vars = []; % Initialized data structures (clusters) with the first sample: % yt = data(1,:); nClusts = 1; nSamplesPerClust = [ 1 ]; means(nClusts,:) = yt; vars = var_floor*ones(nClusts,length(yt)); difficultSamples = []; for di=2:Niters, si = mod( di, size(data,1) ) + 1; yt = data(si,:); % Find the CLOSEST cluster to this sample yt: % [nearest] = findNearestCluster(yt,means,vars); % Deterimine how this point falls relative to the "nearest" cluster: % ci=nearest; diff = (means(ci,:) - yt) ./ sqrt(vars(ci,:)); dSqr = diff * (diff.'); if( dSqr <= tauTheta ) disp( 'dSqr <= tauTheta' ); % yt is in the interiour of cluster "nearest" (update this cluster): nSamplesPerClust(ci) = nSamplesPerClust(ci)+1; means(ci,:) = ( di*means(ci,:) + yt )/( di+1 ); vars(ci,:) = ( di*vars(ci,:) + ( yt - means(ci,:) ).^2 )/( di+1 ); vars(ci,vars(ci,:) MAX_NUMBER_OF_DIFFICULT_SAMPLES ) % Force classification of each sample: for ci=1:size(difficultSamples,1), yt = difficultSamples(ci,:); ci = findNearestCluster( yt, means, vars ); nSamplesPerClust(ci) = nSamplesPerClust(ci)+1; means(ci,:) = ( di*means(ci,:) + yt )/( di+1 ); vars(ci,:) = ( di*vars(ci,:) + ( yt - means(ci,:) ).^2 )/( di+1 ); vars(ci,vars(ci,:)