/* parallel_jacobi.c -- parallel implementation of Jacobi's method * for solving the linear system Ax = b. Uses block distribution * of the input vectors and block-row distribution of A. * * The matrix is filled randomly using drand48. * * Input: * n: order of system (not ness. divisible by p (the number of processors)... * all spill over is handled by the last processor (p-1) * tol: convergence tolerance * max_iter: maximum number of iterations * A: coefficient matrix * b: right-hand side of system * * Output: * x: the solution if the method converges * max_iter: if the method fails to converge * * Notes: * 1. A should be strictly diagonally dominant in * order to insure convergence. * 2. A, x, and b are statically allocated. * * See Chap 10, pp. 220 & ff in PPMPI. * * Note: In many places in this code I issue MPI_Send(...,rem,...). Before this call * I check that rem != 0. I would think that the MPI_Send command should work with * a zeros size argument but since I have heard otherwise I check with an if statement * before executing the MPI_Send. * * Written by: * -- * John L. Weatherwax 2006-01-29 * * email: wax@alum.mit.edu * * Please send comments and especially bug reports to the * above email address. * *----- */ #include #include #include #include "mpi.h" #define Swap(x,y) {float* temp; temp = x; x = y; y = temp;} #define MAX_DIM 100 typedef float MATRIX_T[MAX_DIM][MAX_DIM]; int Parallel_jacobi( MATRIX_T A_local /* in */, float x_local[] /* out */, float b_local[] /* in */, int n /* in */, float tol /* in */, int max_iter /* in */, int p /* in */, int my_rank /* in */); void Read_matrix(char* prompt, MATRIX_T A_local, int n, int my_rank, int p); void Read_vector(char* prompt, float x_local[], int n, int my_rank, int p); void Print_matrix(char* title, MATRIX_T A_local, int n, int my_rank, int p); void Print_vector(char* title, float x_local[], int n, int my_rank, int p); void Gen_matrix( MATRIX_T A_local /* out */, int n /* in */, int my_rank /* in */, int p /* in */); void Gen_vector( float x_local[] /* out */, int n /* in */, int my_rank /* in */, int p /* in */); main(int argc, char* argv[]) { int p; int my_rank; MATRIX_T A_local; float x_local[MAX_DIM]; float b_local[MAX_DIM]; int n; float tol; int max_iter; int converged; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); if (my_rank == 0) { /*printf("Enter n, tolerance, and max number of iterations\n");*/ scanf("%d %f %d", &n, &tol, &max_iter); /*if( n % p != 0 ) printf("n not divisible by p...invoking code designed for spillover...\n");*/ } MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&tol, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&max_iter, 1, MPI_INT, 0, MPI_COMM_WORLD); srand48( (long int) p ); /* Seed the random number generator: */ Gen_matrix(A_local, n, my_rank, p); Print_matrix("The random matrix GENERATED is:",A_local,n,my_rank,p); Gen_vector(b_local, n, my_rank, p); Print_vector("The random RHS GENERATED is:",b_local,n,my_rank,p); converged = Parallel_jacobi(A_local, x_local, b_local, n, tol, max_iter, p, my_rank); if (converged) Print_vector("The solution is", x_local, n, my_rank, p); else if (my_rank == 0) printf("Failed to converge in %d iterations\n", max_iter); MPI_Finalize(); } /* main */ /*********************************************************************/ /* Return 1 if iteration converged, 0 otherwise */ /* MATRIX_T is a 2-dimensional array */ int Parallel_jacobi( MATRIX_T A_local /* in */, float x_local[] /* out */, float b_local[] /* in */, int n /* in */, float tol /* in */, int max_iter /* in */, int p /* in */, int my_rank /* in */) { int i_local, i_global, i,j, itmp, loc_up_lim; int n_bar, rem; int iter_num; float x_temp1[MAX_DIM]; float x_temp2[MAX_DIM]; float* x_old; float* x_new; float Distance(float x[], float y[], int n); n_bar = n / p; rem = n % p; /* Compute the local upper limit index: */ loc_up_lim=( ( my_rank!=p-1 ) ? n_bar : n_bar+rem ); /*printf("my_rank=%d; loc_up_lim = %d\n", my_rank, loc_up_lim); Print_vector("b_local",b_local,n,my_rank,p); printf("rem = %d\n",rem);*/ /* Initialize x_temp1 */ MPI_Allgather(b_local, n_bar, MPI_FLOAT, x_temp1, n_bar, MPI_FLOAT, MPI_COMM_WORLD); if( rem != 0 ){ /* Get the additional elements held in process p-1 into x_temp1: */ if( my_rank==p-1 ){ /* First fill processor p-1's data into x_temp1 */ for(i=0; i= tol)); if (Distance(x_new,x_old,n) < tol) return 1; else return 0; } /* Jacobi */ /*********************************************************************/ float Distance(float x[], float y[], int n) { int i; float sum = 0.0; for (i = 0; i < n; i++) { sum = sum + (x[i] - y[i])*(x[i] - y[i]); } return sqrt(sum); } /* Distance */ /*********************************************************************/ void Gen_matrix( MATRIX_T A_local /* out */, int n /* in */, int my_rank /* in */, int p /* in */) { int i, j, global_index; float row_sum; int n_bar, rem; MPI_Status status; n_bar = n / p; rem = n % p; /* Fill dummy entries in temp with zeroes */ for (i = 0; i < n; i++) for (j = n; j < MAX_DIM; j++) A_local[i][j] = -1.0; /* Fill matrix randomly */ for (i = 0; i < n_bar; i++) for (j = 0; j < n; j++) A_local[i][j] = (float) drand48(); if( rem != 0 && my_rank == p-1 ) { /* Any remaining required rows are given to process "p-1": */ for (i = 0; i < rem; i++) for (j = 0; j < n; j++) A_local[n_bar+i][j] = (float) drand48(); } /* To guarantee strict diagonal dominance (and nice convergence properties), we overwrite the diagonal elements with the sum of all the elements in that row ... we do this now */ for (i = 0; i < n_bar; i++){ row_sum=0.0; for (j = 0; j < n; j++) row_sum += A_local[i][j]; global_index = i+my_rank*n_bar; A_local[i][global_index] = row_sum; } if( rem != 0 && my_rank == p-1 ){ /* Any remaining rows are assigned to process "p-1": */ for (i = 0; i < rem; i++){ row_sum=0.0; for (j = 0; j < n; j++) row_sum += A_local[i][j]; global_index = i+my_rank*n_bar; A_local[n_bar+i][global_index] = row_sum; } } } /* Gen_matrix */ /*********************************************************************/ void Gen_vector( float x_local[] /* out */, int n /* in */, int my_rank /* in */, int p /* in */) { int i; int n_bar, rem; MPI_Status status; n_bar = n/p; rem = n % p; for (i = 0; i < n_bar; i++) x_local[i] = (float) drand48(); if( rem != 0 && my_rank == p-1 ){ /* Any remaining required elements are given to process "p-1": */ for (i = 0; i < rem; i++) x_local[n_bar+i] = (float) drand48(); } } /* Gen_vector */ /*********************************************************************/ void Print_matrix( char* title /* in */, MATRIX_T A_local /* in */, int n /* in */, int my_rank /* in */, int p /* in */) { int i, j; MATRIX_T temp; int n_bar, rem; MPI_Status status; n_bar = n/p; rem = n % p; MPI_Gather(A_local, n_bar*MAX_DIM, MPI_FLOAT, temp, n_bar*MAX_DIM, MPI_FLOAT, 0, MPI_COMM_WORLD); if( rem != 0 && my_rank==p-1 ){ /* Send the remaining rows on processor p-1 to processor 0 */ MPI_Send(&(A_local[n_bar][0]),rem*MAX_DIM,MPI_FLOAT,0,0,MPI_COMM_WORLD); } if( rem != 0 && my_rank==0 ){ MPI_Recv(&(temp[n_bar*p][0]),rem*MAX_DIM,MPI_FLOAT,p-1,0,MPI_COMM_WORLD,&status); } if (my_rank == 0) { printf("%s\n", title); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) printf("%12.8f ", temp[i][j]); printf("\n"); } } } /* Print_matrix */ /*********************************************************************/ void Print_vector( char* title /* in */, float x_local[] /* in */, int n /* in */, int my_rank /* in */, int p /* in */) { int i; float temp[MAX_DIM]; int n_bar, rem; MPI_Status status; n_bar = n/p; rem = n % p; MPI_Gather(x_local, n_bar, MPI_FLOAT, temp, n_bar, MPI_FLOAT, 0, MPI_COMM_WORLD); if( rem != 0 && my_rank==p-1 ){ /* Send the remaining elements on processor p-1 to processor 0 */ MPI_Send(&(x_local[n_bar]),rem,MPI_FLOAT,0,1,MPI_COMM_WORLD); } if( rem != 0 && my_rank==0 ){ MPI_Recv(&(temp[n_bar*p]),rem,MPI_FLOAT,p-1,1,MPI_COMM_WORLD,&status); } if (my_rank == 0) { printf("%s\n", title); for (i = 0; i < n; i++) printf("%12.8f ", temp[i]); printf("\n"); } } /* Print_vector */