#include #include "mpi.h" /* Exercise 6.10.2, see Chap 6, p. 109 in PPMPI. by John Weathewax Here we input from a file a matrix distributed as row major order and distribute into individual processes in block column order. Each process creates a vector of ones and we compute the global matrix vector multiply. */ #define MAX_ORDER 100 typedef float LOCAL_MATRIX_T[MAX_ORDER][MAX_ORDER]; main( int argc, char* argv[] ){ int my_rank; int p; LOCAL_MATRIX_T local_A; float local_x[MAX_ORDER]; float global_b[MAX_ORDER]; int n; int local_n; /* declarations: */ void Read_Row_Major_Matrix(LOCAL_MATRIX_T local_A,float local_x[],int *local_n,int *n,int my_rank,int p); void Print_Block_Col_Order(LOCAL_MATRIX_T local_A,int local_n,int n,int my_rank,int p); void Mult_Mat_by_Vect(LOCAL_MATRIX_T local_A,float local_x[], int n, int local_n, int my_rank, float global_b[]); void PrintResult(int n,int my_rank,float globa_b[]); MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&p); MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); Read_Row_Major_Matrix(local_A,local_x,&local_n,&n,my_rank,p); Mult_Mat_by_Vect(local_A,local_x,n,local_n,my_rank,global_b); Print_Block_Col_Order(local_A,local_n,n,my_rank,p); PrintResult(n,my_rank,global_b); MPI_Finalize(); } /**********************************************************************/ void Mult_Mat_by_Vect( LOCAL_MATRIX_T local_A /* in */, float local_x[] /* in */, int n /* in */, int local_n /* in */, int my_rank /* in */, float global_b[] /* out */){ float local_b[MAX_ORDER]; int i,j,xIndx; /* Sum local contributions to the matrix vector product */ for( i=0; i < n; i++ ){ /* we */ local_b[i] = 0.0; for( j=0; j < local_n; j++ ){ xIndx = local_n*my_rank + j; local_b[i] = local_b[i]+local_A[i][j]*local_x[xIndx]; } } /* Reduce all partial sum results ... this may not be the best way ... perhaps a derived type would be better */ for( i=0; i < n; i++ ){ MPI_Allreduce(&(local_b[i]),&(global_b[i]),1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD); } } /**********************************************************************/ void PrintResult( int n /* in */, int my_rank /* in */, float global_b[] /* in */){ int i; if( my_rank==0 ){ for( i=0; i < n; i++ ){ printf("%7.3f",global_b[i]); } printf("\n"); } } /**********************************************************************/ void Print_Block_Col_Order( LOCAL_MATRIX_T local_A /* in */, int local_n /* in */, int n /* in */, int my_rank /* in */, int p /* in */){ int i,j,bri,bci,start_row,start_col,tag; float local_A0[MAX_ORDER][MAX_ORDER]; MPI_Datatype loc_sub_mat_t; MPI_Status status; /* Create a mpi type used in sending data to process 0 ... Q: WAS THE DECLARATION OF THIS TYPE (BELOW) SUFFICIENT? */ MPI_Type_vector(local_n,local_n,MAX_ORDER,MPI_FLOAT,&loc_sub_mat_t); MPI_Type_commit(&loc_sub_mat_t); for( bri=0; bri < p; bri++ ){ start_row=bri*local_n; start_col=0; tag = my_rank + p * bri; MPI_Send(&(local_A[start_row][start_col]),1,loc_sub_mat_t,0,tag,MPI_COMM_WORLD); } /* Print these suckers... */ if( my_rank==0 ){ for( bri=0; bri < p; bri++ ){ for( bci=0; bci < p; bci++ ){ start_col = bci*local_n; start_row = bri*local_n; tag = bci + p * bri; MPI_Recv(&(local_A0[start_row][start_col]),1,loc_sub_mat_t,MPI_ANY_SOURCE,tag,MPI_COMM_WORLD,&status); } } /* All submatrices recieved ... as process==0 I'll do my thang ... */ for( i=0; i < n; i++ ){ for( j=0; j < n; j++ ){ printf("%7.2f ",local_A0[i][j]); } printf("\n"); } printf("\n"); } } /**********************************************************************/ void Read_Row_Major_Matrix( LOCAL_MATRIX_T local_A /* out */, float local_x[] /* out */, int *local_n /* out */, int *n /* out */, int my_rank /* in */, int p /* in */) { int i, j, bri, bci; int send_start_row, send_start_col; int recv_start_row, recv_start_col; char mat_file[] = "prob_6.10.1.b.dat"; FILE *fp; float local_A0[MAX_ORDER][MAX_ORDER]; MPI_Datatype loc_sub_mat_t; MPI_Status status; /* Use rank==0 process as gatekeeper for all input data... this means that we read all data first and then send it out. */ if( my_rank==0 ){ if( (fp=fopen(mat_file,"r")) == NULL ){ printf("Error opening file...exiting\n"); MPI_Finalize(); exit(1); } /* read in matrix size (n x n): */ fscanf(fp,"%d",n); printf("n = %d\n",*n); /* compute the local block row/column size: */ (*local_n) = (*n)/p; printf("local_n = %d\n",*local_n); /* read matrix (row major order) into local storage on process==0: */ for( i=0; i < (*n); i++ ){ for( j=0; j < (*n); j++ ){ fscanf(fp,"%f",&local_A0[i][j]); } } /* for debugging ... print matrix */ for( i=0; i < (*n); i++ ){ for( j=0; j < (*n); j++ ){ printf("%f ",local_A0[i][j]); } printf("\n"); } } /* end data input processing for my_rank==0 */ /* process==0 now has all the data, send it to all OTHER processes... */ MPI_Bcast(n,1,MPI_INTEGER,0,MPI_COMM_WORLD); MPI_Bcast(local_n,1,MPI_INTEGER,0,MPI_COMM_WORLD); /* Create a mpi type used in sending data to the other processes */ MPI_Type_vector((*local_n),(*local_n),MAX_ORDER,MPI_FLOAT,&loc_sub_mat_t); MPI_Type_commit(&loc_sub_mat_t); /* Issue send and receives... */ for( bri=0; bri < p; bri++ ){ for( bci=0; bci < p; bci++ ){ send_start_row = bri*(*local_n); send_start_col = bci*(*local_n); if( my_rank==0 ){ MPI_Send(&(local_A0[send_start_row][send_start_col]),1, loc_sub_mat_t,bri,0,MPI_COMM_WORLD); } recv_start_row = send_start_row; recv_start_col = 0; if( my_rank==bci ){ MPI_Recv(&(local_A[recv_start_row][recv_start_col]),1, loc_sub_mat_t,0,0,MPI_COMM_WORLD,&status); } } } /* At this point the input matrix is distributed (in block columns) across all processors... */ /* Create a vector of all ones in each local process */ for( i=0; i < (*n); i++ ) local_x[i] = 1.0; }