function [data,class_labels] = load_lungB(do_preprocessing, do_column_ordering, do_zscore, dim_reduction_method) % LOAD_LUNGB - Loads the lungB dataset and applys some % preprocessing/dimensionality reduction if desired % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- addpath( '../../Code/eda_data' ); load lungB; data = lungB.'; % n=number of samples=156; % p=number of features=675 [n,p] = size(data); ind_AD = find(strcmpi(labB,'AD')); ind_NL = find(strcmpi(labB,'NL')); class_labels = zeros(n,1); class_labels(ind_AD)=1; class_labels(ind_NL)=2; % apply some preprocessing to our data ... % if( do_preprocessing ) f_mu = mean( data ); f_sd = std( data ); % lets sort the columns in decending order by the magnitude of the variance/standard deviation: if( do_column_ordering ) [f_sd_s,indx] = sort( f_sd ); f_sd_s = fliplr(f_sd_s); indx = fliplr(indx); data = data(:,indx); f_mu = f_mu(indx); f_st = f_sd(indx); end % compute the z-score of this data: if( do_zscore ) data_t = ( data - repmat( f_mu, [n,1] ) ) ./ repmat( f_sd, [n,1] ); data = data_t; end end switch dim_reduction_method case 0, % none ... results in a singular covariance matrix ... % data_projected = data; case 1, % use PCA ... following the analysis in prob_2_8_a_lungB.m: % % we reduce the dimension to 60 ... based on the results from prob_2_8_a_lungB.m % %M = corrcoef( data ); % Compute the correlation matrix: M = cov( data ); % Compute the covariance matrix: % Perform PCA on the M matrix: [eigvec,eigval] = eig(M); eigval = diag(eigval); % extract the diagonal elements % order in descending order eigval = flipud(eigval); eigvec = eigvec(:,p:-1:1); % project our data onto the dimensions with the "proj_dim" largest variances: proj_dim = 20; data_projected = data * eigvec(:,1:proj_dim); case 2 % use a SVD based projection approach % [u,d,v] = svd(data); proj_dim = 20; hatv = v(1:proj_dim,:); hatx = u * d * (hatv.'); data_projected = hatx; otherwise error('unknown value of dim_reduction_method'); end data = data_projected;