#include #include #include #include #include #define N 5E6 #define SEND_COUNT 1 #define SEND_ARRAY 2 int best_of_three(int *a, int n); void create_random_array(int **array, int n); int quicksort_one_step(int *a, int upper, int pivot); void quicksort_local(int *a, int upper); int main(int argc, char *argv[]) { int i, j, n, my_n, space_malloced; int my_rank, my_local_rank, num_procs; int pivot, count, tmp_count; int compare_to, divider, check; int *recv_count, *disp; int *a, *my_a, *tmp2; double t1, t2; MPI_Status stat; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /* number of processes */ check = num_procs; while(check>1) { if(check%2!=0) { fprintf(stderr, "Number of processes must power of 2."); MPI_Abort(MPI_COMM_WORLD, 1); } check/=2; } /* initialize arrays, fill array at random */ my_n = N/num_procs; if(num_procs>1) { my_a = malloc(my_n*sizeof(int)); tmp2 = malloc(my_n*sizeof(int)); } if(my_rank==0) create_random_array(&a, my_n*num_procs); /* start timing */ if(my_rank==0) t1 = MPI_Wtime(); if(num_procs>1) MPI_Scatter(a, my_n, MPI_INT, my_a, my_n, MPI_INT, 0, MPI_COMM_WORLD); else my_a = a; my_local_rank = my_rank; divider = num_procs; compare_to = divider-my_rank-1; space_malloced = my_n; /* process 0 chooses pivot and broadcastes this one; processes at the endpoints exchange elements, processes in the middle exchange elements, and all processes between find a partner in similar way; we then divide the processors in two parts, give the first one a local rank 0 and continue until all neighbour processes has exchanged lists. */ while(divider>1) { my_local_rank = my_rank%divider; /* let processor 0 choose and broadcast pivot */ if(my_local_rank==0) { pivot = my_a[best_of_three(my_a, my_n)]; for(i = 1; i < divider; i++) { MPI_Send(&pivot, 1, MPI_INT, i+my_rank, divider, MPI_COMM_WORLD); } } else { MPI_Recv(&pivot, 1, MPI_INT, MPI_ANY_SOURCE, divider, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } /* do one step quicksort on own part */ if(my_n>1) { i = quicksort_one_step(my_a, my_n, pivot); } else { i = 0; } if(i>my_n) i = my_n; /* exchange elements with paired processor */ if(compare_to>my_rank) { /* check if we need to malloc more space */ tmp_count = my_n-i; MPI_Sendrecv(&tmp_count, 1, MPI_INT, compare_to, SEND_COUNT, &count, 1, MPI_INT, compare_to, SEND_COUNT, MPI_COMM_WORLD, &stat); if(i+count>space_malloced) { /* allocate new array, copy values, receive values */ tmp2 = malloc((i+count)*sizeof(int)); if(tmp2==NULL) { fprintf(stderr, "Could not malloc, exiting."); MPI_Abort(MPI_COMM_WORLD, -1); } memcpy(tmp2, my_a, i*sizeof(int)); // fprintf(stderr, "%d cmpt %d: sending %d from my_a[%d], receiving %d to tmp2[%d], %d spaces.\n", my_rank, compare_to, tmp_count, i, count, i, i+count); MPI_Sendrecv(&my_a[i], tmp_count, MPI_INT, compare_to, SEND_ARRAY, &tmp2[i], count, MPI_INT, compare_to, SEND_ARRAY, MPI_COMM_WORLD, &stat); free(my_a); my_a = tmp2; my_n = i+count; space_malloced = my_n; } else { /* receive and put directly after first part */ MPI_Sendrecv(&my_a[i], tmp_count, MPI_INT, compare_to, SEND_ARRAY, &my_a[i], count, MPI_INT, compare_to, SEND_ARRAY, MPI_COMM_WORLD, &stat); my_n = i+count; } } else { /* check how much space we need */ MPI_Sendrecv(&i, 1, MPI_INT, compare_to, SEND_COUNT, &count, 1, MPI_INT, compare_to, SEND_COUNT, MPI_COMM_WORLD, &stat); if(my_n-i+count>space_malloced) { /* allocate new array, copy values, receive values */ tmp2 = malloc((my_n-i+count)*sizeof(int)); if(tmp2==NULL) { fprintf(stderr, "Could not malloc, exiting."); MPI_Abort(MPI_COMM_WORLD, -1); } memcpy(tmp2, &my_a[i], (my_n-i)*sizeof(int)); // fprintf(stderr, "%d cmpt %d: sending %d from my_a[%d], receiving %d to tmp2[%d], %d spaces.\n", my_rank, compare_to, i, 0, count, my_n-i, my_n-i+count); MPI_Sendrecv(my_a, i, MPI_INT, compare_to, SEND_ARRAY, &tmp2[my_n-i], count, MPI_INT, compare_to, SEND_ARRAY, MPI_COMM_WORLD, &stat); free(my_a); my_n=my_n-i+count; space_malloced = my_n; my_a=tmp2; } else { /* copy last part to the start of the array */ for(j = 0; j < my_n-i; j++) my_a[j] = my_a[j+i]; /* put received part to follow */ MPI_Sendrecv(my_a, i, MPI_INT, compare_to, SEND_ARRAY, &tmp2[my_n-i], count, MPI_INT, compare_to, SEND_ARRAY, MPI_COMM_WORLD, &stat); my_n = my_n-i+count; } } divider/=2; n = my_rank/divider; compare_to = (n+1)*divider-my_rank%divider-1; } MPI_Barrier(MPI_COMM_WORLD); /* sort locally */ quicksort_local(my_a, my_n); recv_count = malloc(num_procs*sizeof(int)); disp = malloc(num_procs*sizeof(int)); MPI_Allgather(&my_n, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD); disp[0] = 0; for(i = 1; i < num_procs; i++) disp[i] = disp[i-1] + recv_count[i-1]; MPI_Gatherv(my_a, my_n, MPI_INT, a, recv_count, disp, MPI_INT, 0, MPI_COMM_WORLD); if(my_rank==0) { t2 = MPI_Wtime(); printf("Done sorting; elapsed time is %f\n", t2-t1); } if(num_procs>1) { free(my_a); } if(my_rank==0) { free(a); } MPI_Finalize(); return 0; } int best_of_three(int *a, int n) { if ((a[0]>a[n/2] && a[0]a[n-1])) return 0; if ((a[n/2]>a[0] && a[n/2]a[n-1])) return n/2; return n-1; } void create_random_array(int **array, int n) { int *arr; int i; arr = malloc(n*sizeof(int)); if(arr==NULL) { fprintf(stderr, "Could not malloc, exiting.\n"); MPI_Abort(MPI_COMM_WORLD, 1); } for(i = 0; i < n; i++) arr[i] = rand()%n; *array = arr; } /* one step quicksort to use in parallel process */ int quicksort_one_step(int *a, int upper, int pivot) { int i, j, tmp; j = upper-1; i = 0; while(i <= j) { while(a[i] < pivot) i++; while(a[j] > pivot) j--; if(i <= j) { tmp =a[i]; a[i] = a[j]; a[j] = tmp; i++; j--; } } return i; } /* recursive quicksort to use in local sort */ void quicksort_local(int *a, int upper) { int i, j, tmp, pivot, piv_index; if(upper==1) return; if(upper==2) { if(a[0]>a[1]) { tmp = a[1]; a[1] = a[0]; a[0] = tmp; } return; } piv_index = best_of_three(a, upper); pivot = a[piv_index]; j = upper-1; i = 0; while(i < j) { while(a[i] < pivot) i++; while(a[j] > pivot) j--; if(i < j) { tmp =a[i]; a[i] = a[j]; a[j] = tmp; i++; j--; } } quicksort_local(&a[0], i); quicksort_local(&a[i], upper-i); }