# # EPage 374 # # Written by: # -- # John L. Weatherwax 2009-04-21 # # email: wax@alum.mit.edu # # Please send comments and especially bug reports to the # above email address. # #----- library(DAAG) library(MASS) library(rpart) library(randomForest) set.seed(0) ns = c(25,50,100,400,800) empirically_repeated = c() expected_repeated = c() for( n in ns ){ the_samps = sample( 1:n, size=n, replace=T ) # Determine the number of repeated elements: # num_of_dups = sum( rle( sort(the_samps) )$lengths != 1 ) expected_number_of_repeats = (1-n^(-1))^n empirically_repeated = c( empirically_repeated, num_of_dups/n ) expected_repeated = c( expected_repeated, expected_number_of_repeats ) print( sprintf( "sample size= %5d; empricial fraction of repeats= %5.3f; expected number of repeats= %5.3f", n, num_of_dups/n, expected_number_of_repeats ) ) } # Part 6: plot( empirically_repeated, expected_repeated, type='p', xlab="empirically observed fraction of repeats", ylab="expected fraction of repeats" ) # Part 7: ys = exp(-1) - (1-ns^(-1))^ns plot( log(ns), ys )