#!/usr/bin/env python # import pylab as pl import numpy as np; from numpy import mean, zeros, ones, hstack, vstack, asarray, asmatrix, nan, var, cov import scipy; from scipy.misc import comb import itertools; from itertools import combinations import collections; from collections import Counter vs = asarray( [15,9,27,21,6] ) ws = asarray( [22,8,34,24,12] ) N = len(vs) s = 3 # number of samples to draw from the v values to compute \bar{Y} t = 2 # number of samples to draw from the w values to compute \bar{Z} # Extract all possible values for \bar{Y} the mean of "s" and \bar{Z} the mean of "t" samples: possible_Y_bar_values = [] possible_Z_bar_values = [] for v_items in combinations( range(N), s ): w_items = tuple( set( range(N) ) - set( v_items ) ) possible_Y_bar_values.append( mean(vs[asarray(v_items)]) ) possible_Z_bar_values.append( mean(ws[asarray(w_items)]) ) # Lets compute the possible values of the variable Z_bar - Y_bar (each has a probability of 1/nchoose(5,3) = 1/nchoose(5,2) = 1/10 of happening): # possible_Z_bar_minus_Y_bar = asarray( possible_Z_bar_values ) - asarray( possible_Y_bar_values ) print possible_Z_bar_minus_Y_bar # Compute some statistics of vs and ws (Needed for Problem 2 in this section): var(vs) var(ws) cov( vstack((vs,ws)), bias=1 ) var( asarray(vs) - asarray(ws) ) # Compute some statistics of \bar{Z} - \bar{Y} (needed for problem 4 (i)) in this section): # print "Expectation of Zbar - Ybar= ", mean( possible_Z_bar_minus_Y_bar ) print "Expectation^2 of Zbar - Ybar= ", mean( possible_Z_bar_minus_Y_bar**2 ) print "Variance of Zbar - Ybar= ", mean( possible_Z_bar_minus_Y_bar**2 ) - mean( possible_Z_bar_minus_Y_bar )**2 # For part (iii) compare the two distributions by plotting them: # if False: # For design B: pl.stem( possible_Z_bar_minus_Y_bar, (1./10)*ones( len(possible_Z_bar_minus_Y_bar) ), markerfmt='bo', linefmt='b-' ) pl.hold('on') # For design A: diff_values = asarray( list( set( ws-vs ) ) ) prob_values = asarray( [ 1, 1, 1, 2 ] )/5. pl.stem( diff_values, prob_values, markerfmt='ro', linefmt='r-' ) ax = pl.gca() ax.set_ylim( [0, .45] ) ax.set_xlabel( 'value' ) ax.set_ylabel( 'probability' ) pl.show()