/* "sort distribute" -- parallel implementation of a simple sorting routine. * The probability distribution of the keys to be sorted is known, and * each process is filled with input from the command line. * * Specifically, we generate n uniformly random floats between 0.0 and 100.0 * % jot -r -s " " 100 0.0 100.0 * * If the maximum key value is 100.0 with p processors: * processor 0 gets all keys from 0.0 < key < 100.0/p * processor 1 gets all keys from 100.0/p < key < 2*(100.0/p) * ... * processor "my_rank" gets all keys from my_rank*(100.0/p) < key < (my_rank+1)*(100.0/p) * ... * processor p-1 gets all keys from (p-1)*(100.0/p) < key < 100.0 * * Input: * keys: the elements to sort (input in from process 0) * * Output: * keys_sorted: local array of sorted elements * * See Chap 10, pp. 242 in PPMPI. * * Written by: * -- * John L. Weatherwax 2006-01-29 * * email: wax@alum.mit.edu * * Please send comments and especially bug reports to the * above email address. * *----- */ #include #include #include #include "mpi.h" #define MAX_DIM 100 void Read_vector(char* prompt, float **keys_local, float *max_key, int* n, int my_rank, int p); void Print_vector(char* title, float *keys_local, int n_local, int n, int my_rank, int p); void Local_Sort(float *keys_local, int n_keys); void Shuffle_Keys(int p, int my_rank, float max_key, float* keys_local, int n_local, float** new_keys_local, int *new_n_local); int key_cmpr_fn(int* p, int* q); int main(int argc, char* argv[]) { int p,my_rank,n,n_bar,n_rem,n_tol,n_local,new_n_local,ii; float *keys_local,*new_keys_local,max_key=-1.0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Fills (and allocates memory) for the array keys_local */ Read_vector("Enter the numbers (integers) to be sorted:", &keys_local, &max_key, &n, my_rank, p); n_bar = n / p; n_rem = n % p; /* Compute the number of elements in each local array: */ n_local = ( my_rank != p-1 ? n_bar : n_bar+n_rem ); Print_vector("Vector read was:", keys_local, n_local, n, my_rank, p); Local_Sort(keys_local,n_local); /*Print_vector("Local vector sorted is:", keys_local, n_local, n, my_rank, p);*/ /* Find which keys go to which processors and pass (filling new_keys_local) */ Shuffle_Keys(p,my_rank,max_key,keys_local,n_local,&new_keys_local,&new_n_local); /*Print_vector("new_keys_local is:", new_keys_local, new_n_local, n, my_rank, p);*/ Local_Sort(new_keys_local,new_n_local); Print_vector("Sorted values are:",new_keys_local,new_n_local,n,my_rank,p); free(new_keys_local); MPI_Finalize(); } /* main */ void Shuffle_Keys(int p, int my_rank, float max_key, float* keys_local, int n_local, float** new_keys_local, int *new_n_local){ /* 1) Computes the number of elements this processor needs to send to the other processors and the location where each stretch of elements starts 2) Executes MPI_Alltoallv to support the movement of keys among processors 3) Free old key locations keys_local and allocate new key locations new_keys_local */ int pos,ii; int *send_counts,*send_displacements,*recv_counts,*recv_displacements; send_counts = (int*) malloc(p*sizeof(int)); for( ii=0; ii < p; ii++ ) send_counts[ii]=0; send_displacements = (int*) malloc(p*sizeof(int)); for( ii=0; ii < p; ii++ ) send_displacements[ii]=0; pos=0; /* initial processor to send keys to */ send_counts[pos]=0; /* initialization */ send_displacements[pos]=0; /* initialization */ for( ii=0; ii < n_local; ii++ ){ /* if needed, update the "bin" that this local key goes into: */ if( keys_local[ii] > (pos+1)*(max_key/p) ){ pos++; send_counts[pos]=0; send_displacements[pos]=send_displacements[pos-1]+send_counts[pos-1]; } /* debugging check: */ if( pos > p-1 ){ fprintf(stderr,"pos too large\n"); exit(1); } send_counts[pos]++; } /*printf("my_rank=%d; send_counts[0]=%d, send_counts[1]=%d\n",my_rank,send_counts[0],send_counts[1]);*/ /* Pass this information to each processor */ recv_counts = (int*) malloc(p*sizeof(int)); recv_displacements = (int*) malloc(p*sizeof(int)); MPI_Alltoall(send_counts,1,MPI_INT,recv_counts,1,MPI_INT,MPI_COMM_WORLD); /*printf("my_rank=%d; recv_counts[0]=%d, recv_counts[1]=%d\n",my_rank,recv_counts[0],recv_counts[1]);*/ /* Compute the total number of elements this processor will recieve from all other processors */ *new_n_local = recv_counts[0]; for( ii=1; ii < p; ii++ ){ (*new_n_local)+=recv_counts[ii]; } /* Allocate space for the keys to be sent here ... */ *new_keys_local = (float *) malloc((*new_n_local)*sizeof(float)); /* Fill the recv_displacements array */ recv_displacements[0]=0; for( ii=1; ii < p; ii++ ){ recv_displacements[ii]=recv_displacements[ii-1]+recv_counts[ii-1]; } MPI_Alltoallv(keys_local, send_counts, send_displacements, MPI_FLOAT, *new_keys_local, recv_counts, recv_displacements, MPI_FLOAT, MPI_COMM_WORLD); free(send_counts); free(send_displacements); free(recv_counts); free(recv_displacements); free(keys_local); }; void Local_Sort(float *keys_local, int n_local){ qsort(&(keys_local[0]), n_local, sizeof(int),(int(*)(const void*,const void*))(key_cmpr_fn)); }; int key_cmpr_fn(int* p, int* q){ if( *p < *q ) return -1; else if( *p == *q ) return 0; else return +1; } /*********************************************************************/ void Read_vector( char* prompt /* in */, float** keys_local /* out */, float* max_key /* out */, int* n /* out */, int my_rank /* in */, int p /* in */) { int i; float temp[MAX_DIM]; int n_bar, n_rem, n_local; MPI_Status status; /* Get the global number of elements to be sorted */ if( my_rank==0 ){ printf("%s\n", prompt); scanf("%d", n); if( *n > MAX_DIM ){ perror("Requested array size is too large...exiting\n"); exit(1); } } MPI_Bcast(n,1,MPI_INT,0,MPI_COMM_WORLD); n_bar = (*n) / p; n_rem = (*n) % p; /* Compute the number of elements in each local array: */ n_local = (my_rank != p-1 ? n_bar : n_bar+n_rem ); /* Create space for initial elements */ *keys_local = (float*) malloc(n_local*sizeof(float)); if( (*keys_local)==0 ){ perror("Malloc failed...exiting\n"); exit(1); } /* Read in all the keys */ if (my_rank == 0) { for (i = 0; i < *n; i++){ scanf("%f", &temp[i]); if( temp[i] > (*max_key) ) *max_key = temp[i]; /* compute the largest key */ } } MPI_Bcast(max_key,1,MPI_FLOAT,0,MPI_COMM_WORLD); /* Send groupings of the data to be sorted to each process: */ MPI_Scatter(temp, n_bar, MPI_FLOAT, *keys_local, n_bar, MPI_FLOAT, 0, MPI_COMM_WORLD); /* Would this call work as opposed to the two if statements below??? */ /*MPI_Scatter(temp, n_local, MPI_FLOAT, keys_local, n_local, MPI_FLOAT, 0, MPI_COMM_WORLD);*/ /* Any remaining data goes to the last process "p-1": */ if( (n_rem != 0) && (my_rank == 0) ) { MPI_Send(&(temp[n_bar*p]), n_rem, MPI_FLOAT, p-1, 0, MPI_COMM_WORLD); } if( (n_rem != 0) && (my_rank == p-1) ) { MPI_Recv(&((*keys_local)[n_bar]), n_rem, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &status); } } /* Read_vector */ /*********************************************************************/ void Print_vector( char* title /* in */, float *keys_local /* in */, int n_local /* in */, int n /* in */, int my_rank /* in */, int p /* in */) { int i,j,n_print,n_bar,n_rem; float temp[MAX_DIM]; MPI_Status status; const int NELTS_TAG=3; const int ELTS_TAG=4; /* Send the number of elements: */ MPI_Send(&n_local,1,MPI_INT,0,NELTS_TAG,MPI_COMM_WORLD); /* Send the elements themselves: */ MPI_Send(keys_local,n_local,MPI_FLOAT,0,ELTS_TAG,MPI_COMM_WORLD); if (my_rank == 0) { printf("%s\n", title); for (i = 0; i < p; i++){ MPI_Recv(&n_print,1,MPI_INT,i,NELTS_TAG,MPI_COMM_WORLD,&status); /*printf("n_print = %d\n",n_print); */ MPI_Recv(&temp[0],n_print,MPI_FLOAT,i,ELTS_TAG,MPI_COMM_WORLD,&status); for( j=0; j < n_print; j++) printf("%4.1f ", temp[j]); } printf("\n"); } } /* Print_vector */