% % epage 241 % % remiss data ... get statistical model, hypothesis test for equality of the means, % test with the p-value approach. % % Written by: % -- % John L. Weatherwax 2008-02-20 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- clear all; close all; clc; addpath('../../Code/CSTool'); % data seems to be in the second column of two matrices: control and mp % the first column seems to be a count: load remiss; control = control(:,2); % extract just the data mp = mp(:,2); %-- % Try to study this data... %-- %plot a frequency histogram: % figure; hold on; [N,h]=hist(control); bar(h,N,0.25,'r'); [N,h]=hist(mp); bar(h,N,0.25,'g'); xlabel('remission time'); ylabel('count'); title( 'remission times' ); saveas( gcf, 'prob_6_10_rem_times_freq_hist', 'epsc' ); %plot a density histogram: % figure; hold on; [N,h]=hist(control); bar(h,N/(h(2)-h(1))/sum(N),0.25,'r'); [N,h]=hist(mp); bar(h,N/(h(2)-h(1))/sum(N),0.25,'g'); xlabel('remission time'); ylabel('count'); title( 'remission times' ); saveas( gcf, 'prob_6_10_rem_times_dens_hist', 'epsc' ); % this data looks normal. Consider qq plots for each class: figure; qqplot( control ); title( 'remission times of the control group' ); saveas( gcf, 'prob_6_10_qq_control', 'epsc' ); figure; qqplot( mp ); title( 'remission times of the experimental group' ); saveas( gcf, 'prob_6_10_qq_mp', 'epsc' ); % the size of our sample: ns = length(control); % get our sample statistic: t_0 = mean(mp) M = 1e6; Tm = zeros(1,M); for ii=1:M, % draw a random sample under H_0 ... sample with replacement from the data set "control": inds = unidrnd(ns,1,ns); xs = control(inds); Tm(ii) = mean(xs); end p_value = length(find(Tm>=t_0))/M; fprintf('the p-value = %10.5f\n',p_value);