/* parallel_jacobi.c -- parallel implementation of Jacobi's method * for solving the linear system Ax = b. Uses block distribution * of the input vectors and block-row distribution of A. * * Input: * n: order of system (not ness. divisible by p (the number of processors)... * all spill over is handled by the last processor (p-1) * tol: convergence tolerance * max_iter: maximum number of iterations * A: coefficient matrix * b: right-hand side of system * * Output: * x: the solution if the method converges * max_iter: if the method fails to converge * * Notes: * 1. A should be strictly diagonally dominant in * order to insure convergence. * 2. A, x, and b are statically allocated. * * See Chap 10, pp. 220 & ff in PPMPI. * * Written by: * -- * John L. Weatherwax 2006-01-24 * * email: wax@alum.mit.edu * * Please send comments and especially bug reports to the * above email address. * *----- */ #include #include "mpi.h" #include #define Swap(x,y) {float* temp; temp = x; x = y; y = temp;} #define MAX_DIM 100 typedef float MATRIX_T[MAX_DIM][MAX_DIM]; int Parallel_jacobi( MATRIX_T A_local /* in */, float x_local[] /* out */, float b_local[] /* in */, int n /* in */, float tol /* in */, int max_iter /* in */, int p /* in */, int my_rank /* in */); void Read_matrix(char* prompt, MATRIX_T A_local, int n, int my_rank, int p); void Read_vector(char* prompt, float x_local[], int n, int my_rank, int p); void Print_matrix(char* title, MATRIX_T A_local, int n, int my_rank, int p); void Print_vector(char* title, float x_local[], int n, int my_rank, int p); main(int argc, char* argv[]) { int p; int my_rank; MATRIX_T A_local; float x_local[MAX_DIM]; float b_local[MAX_DIM]; int n; float tol; int max_iter; int converged; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); if (my_rank == 0) { /*printf("Enter n, tolerance, and max number of iterations\n");*/ scanf("%d %f %d", &n, &tol, &max_iter); /*if( n % p != 0 ) printf("n not divisible by p...invoking code designed for spillover...\n");*/ } MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&tol, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&max_iter, 1, MPI_INT, 0, MPI_COMM_WORLD); /*Read_matrix("Enter the matrix", A_local, n, my_rank, p);*/ Read_matrix("", A_local, n, my_rank, p); /*Print_matrix("Input matrix is:",A_local,n,my_rank,p);*/ /*Read_vector("Enter the right-hand side", b_local, n, my_rank, p);*/ Read_vector("", b_local, n, my_rank, p); /*Print_vector("Input vector is:", b_local, n, my_rank, p);*/ converged = Parallel_jacobi(A_local, x_local, b_local, n, tol, max_iter, p, my_rank); if (converged) Print_vector("The solution is", x_local, n, my_rank, p); else if (my_rank == 0) printf("Failed to converge in %d iterations\n", max_iter); MPI_Finalize(); } /* main */ /*********************************************************************/ /* Return 1 if iteration converged, 0 otherwise */ /* MATRIX_T is a 2-dimensional array */ int Parallel_jacobi( MATRIX_T A_local /* in */, float x_local[] /* out */, float b_local[] /* in */, int n /* in */, float tol /* in */, int max_iter /* in */, int p /* in */, int my_rank /* in */) { int i_local, i_global, i,j, itmp, loc_up_lim; int n_bar, rem; int iter_num; float x_temp1[MAX_DIM]; float x_temp2[MAX_DIM]; float* x_old; float* x_new; float Distance(float x[], float y[], int n); n_bar = n / p; rem = n % p; /* Compute the local upper limit index: */ loc_up_lim=( ( my_rank!=p-1 ) ? n_bar : n_bar+rem ); /*printf("my_rank=%d; loc_up_lim = %d\n", my_rank, loc_up_lim); Print_vector("b_local",b_local,n,my_rank,p); printf("rem = %d\n",rem);*/ /* Initialize x_temp1 */ MPI_Allgather(b_local, n_bar, MPI_FLOAT, x_temp1, n_bar, MPI_FLOAT, MPI_COMM_WORLD); if( rem != 0 ){ /* Get the additional elements held in process p-1 into x_temp1: */ if( my_rank==p-1 ){ /* First fill processor p-1's data into x_temp1 */ for(i=0; i= tol)); if (Distance(x_new,x_old,n) < tol) return 1; else return 0; } /* Jacobi */ /*********************************************************************/ float Distance(float x[], float y[], int n) { int i; float sum = 0.0; for (i = 0; i < n; i++) { sum = sum + (x[i] - y[i])*(x[i] - y[i]); } return sqrt(sum); } /* Distance */ /*********************************************************************/ void Read_matrix( char* prompt /* in */, MATRIX_T A_local /* out */, int n /* in */, int my_rank /* in */, int p /* in */) { int i, j; MATRIX_T temp; int n_bar, rem; MPI_Status status; n_bar = n/p; rem = n % p; /* Fill dummy entries in temp with zeroes */ for (i = 0; i < n; i++) for (j = n; j < MAX_DIM; j++) temp[i][j] = 0.0; if (my_rank == 0) { printf("%s\n", prompt); for (i = 0; i < n; i++) for (j = 0; j < n; j++) scanf("%f",&temp[i][j]); } MPI_Scatter(temp, n_bar*MAX_DIM, MPI_FLOAT, A_local, n_bar*MAX_DIM, MPI_FLOAT, 0, MPI_COMM_WORLD); if( rem != 0 ){ /* Any remaining data goes to process "p-1": */ if( my_rank == 0 ) { MPI_Send(&(temp[n_bar*p][0]), rem*MAX_DIM, MPI_FLOAT, p-1, 0, MPI_COMM_WORLD); } if( my_rank == p-1 ) { MPI_Recv(&(A_local[n_bar][0]), rem*MAX_DIM, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &status); } } } /* Read_matrix */ /*********************************************************************/ void Read_vector( char* prompt /* in */, float x_local[] /* out */, int n /* in */, int my_rank /* in */, int p /* in */) { int i; float temp[MAX_DIM]; int n_bar, rem; MPI_Status status; n_bar = n/p; rem = n % p; if (my_rank == 0) { printf("%s\n", prompt); for (i = 0; i < n; i++) scanf("%f", &temp[i]); } MPI_Scatter(temp, n_bar, MPI_FLOAT, x_local, n_bar, MPI_FLOAT, 0, MPI_COMM_WORLD); if( rem != 0 ){ /* Any remaining data goes to the last process "p-1": */ if( my_rank == 0 ) { MPI_Send(&(temp[n_bar*p]), rem, MPI_FLOAT, p-1, 1, MPI_COMM_WORLD); } if( my_rank == p-1 ) { MPI_Recv(&(x_local[n_bar]), rem, MPI_FLOAT, 0, 1, MPI_COMM_WORLD, &status); } } } /* Read_vector */ /*********************************************************************/ void Print_matrix( char* title /* in */, MATRIX_T A_local /* in */, int n /* in */, int my_rank /* in */, int p /* in */) { int i, j; MATRIX_T temp; int n_bar, rem; MPI_Status status; n_bar = n/p; rem = n % p; MPI_Gather(A_local, n_bar*MAX_DIM, MPI_FLOAT, temp, n_bar*MAX_DIM, MPI_FLOAT, 0, MPI_COMM_WORLD); if( rem != 0 ){ /* Send the remaining rows on processor p-1 to processor 0 */ if( my_rank==p-1 ){ MPI_Send(&(A_local[n_bar][0]),rem*MAX_DIM,MPI_FLOAT,0,2,MPI_COMM_WORLD); } if( my_rank==0 ){ MPI_Recv(&(temp[n_bar*p][0]),rem*MAX_DIM,MPI_FLOAT,p-1,2,MPI_COMM_WORLD,&status); } } if (my_rank == 0) { printf("%s\n", title); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) printf("%10.6f ", temp[i][j]); printf("\n"); } } } /* Print_matrix */ /*********************************************************************/ void Print_vector( char* title /* in */, float x_local[] /* in */, int n /* in */, int my_rank /* in */, int p /* in */) { int i; float temp[MAX_DIM]; int n_bar, rem; MPI_Status status; n_bar = n/p; rem = n % p; MPI_Gather(x_local, n_bar, MPI_FLOAT, temp, n_bar, MPI_FLOAT, 0, MPI_COMM_WORLD); if( rem != 0 ){ /* Send the remaining elements on processor p-1 to processor 0 */ if( my_rank==p-1 ){ MPI_Send(&(x_local[n_bar]),rem,MPI_FLOAT,0,3,MPI_COMM_WORLD); } if( my_rank==0 ){ MPI_Recv(&(temp[n_bar*p]),rem,MPI_FLOAT,p-1,3,MPI_COMM_WORLD,&status); } } if (my_rank == 0) { printf("%s\n", title); for (i = 0; i < n; i++) printf("%12.8f ", temp[i]); printf("\n"); } } /* Print_vector */