function [data,class_labels] = load_leukemia(do_preprocessing, do_column_ordering, do_zscore) % LOAD_LEUKEMIA - Loads the leukemia dataset and applys some preprocessing if desired % % Written by: % -- % John L. Weatherwax 2005-08-14 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- addpath( '../../Code/eda_data' ); load leukemia; data = leukemia.'; % n=number of samples=number of patients=72; % p=number of features=number of gene expressions=50 % [n,p] = size(data); ind_ALL = find(strcmpi(cancertype,'ALL')); ind_AML = find(strcmpi(cancertype,'AML')); class_labels = zeros(n,1); class_labels(ind_ALL)=1; class_labels(ind_AML)=2; if( do_preprocessing ) % this data comes from a natural process ... might be gaussian ... apply the zscore transformation f_mu = mean( data ); f_sd = std( data ); % lets sort the columns in decending order by the magnitude of the variance/standard deviation: if( do_column_ordering ) [f_sd_s,indx] = sort( f_sd ); f_sd_s = fliplr(f_sd_s); indx = fliplr(indx); data = data(:,indx); f_mu = f_mu(indx); f_st = f_sd(indx); end % compute the z-score of this data: if( do_zscore ) data_t = ( data - repmat( f_mu, [n,1] ) ) ./ repmat( f_sd, [n,1] ); data = data_t; end end