% % epage 325 % % Written by: % -- % John L. Weatherwax 2008-02-20 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- clear all; close all; clc; addpath('../../Code/CSTool'); load snowfall; x=snowfall; n = length(x); % plot the raw data: nbins = 1 + log2(n); % sturges' rule figure; hist(x,nbins); title( 'raw data histogram using Sturges rule' ); saveas( gcf, 'prob_8_14_hist', 'epsc' ); csh=figure; csfreqpoly(x,20); hold on; % try some different orderings of the data: %x = sort(x); %x = [median(x), setdiff(x,median(x))]; n=length(x); % put the median first ... %-- % 1) estimate the number of components using the adaptive density modeling: %-- N=1; % <- the number of clusters ... to be determined ... mus = [ x(1) ]; vrs = [ var(x) ]; % <- variances ps = [ 1 ]; t = 1; % <- threshold for a new cluster ... for ii=2:n, the_pt = x(ii); % compute the Mahalanobis distance to each cluster: dists = ((the_pt-mus).^2)./vrs; if( min(dists)> t ) % THIS POINT IS A {\EM NEW} CLUSTER? N=N+1; the_mu = the_pt; % <- the default initializations ... %the_p = 1/(n+1); the_p = 1/ii; the_vr = var(x(1:ii)); ps = ((ii-1)/ii)*ps; % <- adjust the old definitions ... % group everything ... mus = [mus,the_mu]; vrs = [vrs,the_vr]; ps = [ps,the_p]; else % this point is NOT a {\em new} cluster? t = normpdf(the_pt,mus,sqrt(vrs)); f = dot(t,ps)+1e-9; % <- the probability of this datum tau = t.*ps/f; % <- the posteriori probability of this datum in each cluster % recursively update all paramters: ps = ps + (1/ii)*(tau - ps) + 1e-9; mus = mus + tau.*(the_pt - mus)./(ii*ps); vrs = vrs + tau.*( (the_pt-mus).^2 - vrs )./(ii*ps); end %fprintf( 'sum(ps)=%10.5f (should equal ONE)\n', sum(ps) ); %pause; end % plot the coefficients with a df plot: figure; csdfplot(mus,vrs,ps,min(x),max(x)); title( 'DF plot of the adaptivly found mixing coeffients' ); saveas( gcf, 'prob_8_14_df', 'epsc' ); fprintf('before EM'); fprintf('mus=\n'); fprintf('%10.5f',mus); fprintf('\n'); % plot the density before EM: xe = linspace( min(x), max(x) ); fhat = zeros(size(xe)); for i=1:N, fhat = fhat+ps(i)*normpdf(xe,mus(i),sqrt(vrs(i))); end fh=figure(csh); phbem=plot( xe, fhat, '-xr' ); hold on; grid on; %-- % 2) polish the parameters found above with the EM algorithm: %-- max_it = 1e4; tol = 1e-9; c=N; mix_cof = ps; mu = mus; var_mat = vrs; data = x(:); d=1; clear mus; num_it = 1; deltol = tol+1; while( (num_it<=max_it) & (deltol>tol) ) % check for mixture coefficients that shrink to zero (and remove them): % inds = find(mix_cof<=1e-9); if( any(inds) ) fprintf('removing at least the %d mixture coefficient ... \n',inds(1)); mix_cof(inds) = []; mu(inds) = []; var_mat(inds) = []; c = c-length(inds); end mix_cofup = []; muup = []; varup = []; posterior = zeros(n,c); totprob = zeros(n,1); for i=1:c, % <- for each cluster center ... posterior(:,i) = mix_cof(i)*normpdf(data(:),mu(i),sqrt(var_mat(i))); totprob = totprob + posterior(:,i); % <- this is the probability of this datum ... end den = totprob*ones(1,c)+1e-9; posterior = posterior./den; % <- the posteriori probablity datum i comes from cluster j % update the mixing coefficients: mix_cofup = sum(posterior)/n; % update the means: mut = data.' * posterior; MIX = ones(d,1)*mix_cof + 1e-9; muup = mut./(MIX*n); % update the means and the variances for i=1:c, cen_data = data-ones(n,1)*mu(:,i)'; mat = cen_data' * diag(posterior(:,i))*cen_data; varup(i) = mat./(mix_cof(i)*n+1e-9); end % get the tolerances: delvar = max(max(max(abs(varup-var_mat)))); delmu = max(max(abs(muup-mu))); delpi = max(abs(mix_cof-mix_cofup)); deltol = max([delvar,delmu,delpi]); % reset parameters num_it = num_it + 1; mix_cof = mix_cofup; mu = muup; var_mat = varup; %mix_cof, mu, num_it %var_mat, num_it if( any(isnan(mix_cof)) | any(isnan(mu)) | any(isnan(var_mat)) ) pause; end end fprintf('after EM'); fprintf('mu=\n'); fprintf('%10.5f',mu); fprintf('\n'); % plot the density after EM: % xe = linspace( min(x), max(x) ); fhat = zeros(size(xe)); for i=1:c, fhat = fhat+mix_cof(i)*normpdf(xe,mu(i),sqrt(var_mat(i))); end figure(fh); phem=plot( xe, fhat, '-og' ); hold on; grid on; legend( [phbem,phem], {'adaptive mixture density','EM found density'}, 'location', 'best' ); saveas( gcf, 'prob_8_14_compare', 'epsc' );