PandA-2024.02
sort.c
Go to the documentation of this file.
1 /*
2 Implementation based on algorithm described in:
3 A. Danalis, G. Marin, C. McCurdy, J. S. Meredith, P. C. Roth, K. Spafford, V. Tipparaju, and J. S. Vetter.
4 The scalable heterogeneous computing (shoc) benchmark suite.
5 In Proceedings of the 3rd Workshop on General-Purpose Computation on Graphics Processing Units, 2010
6 */
7 
8 #include "sort.h"
9 
10 void local_scan(int bucket[BUCKETSIZE])
11 {
12  int radixID, i, bucket_indx;
13  local_1 : for (radixID=0; radixID<SCAN_RADIX; radixID++) {
14  local_2 : for (i=1; i<SCAN_BLOCK; i++){
15  bucket_indx = radixID*SCAN_BLOCK + i;
16  bucket[bucket_indx] += bucket[bucket_indx-1];
17  }
18  }
19 }
20 
21 void sum_scan(int sum[SCAN_RADIX], int bucket[BUCKETSIZE])
22 {
23  int radixID, bucket_indx;
24  sum[0] = 0;
25  sum_1 : for (radixID=1; radixID<SCAN_RADIX; radixID++) {
26  bucket_indx = radixID*SCAN_BLOCK - 1;
27  sum[radixID] = sum[radixID-1] + bucket[bucket_indx];
28  }
29 }
30 
31 void last_step_scan(int bucket[BUCKETSIZE], int sum[SCAN_RADIX])
32 {
33  int radixID, i, bucket_indx;
34  last_1:for (radixID=0; radixID<SCAN_RADIX; radixID++) {
35  last_2:for (i=0; i<SCAN_BLOCK; i++) {
36  bucket_indx = radixID * SCAN_BLOCK + i;
37  bucket[bucket_indx] = bucket[bucket_indx] + sum[radixID];
38  }
39  }
40 }
41 
42 void init(int bucket[BUCKETSIZE])
43 {
44  int i;
45  init_1 : for (i=0; i<BUCKETSIZE; i++) {
46  bucket[i] = 0;
47  }
48 }
49 
50 void hist(int bucket[BUCKETSIZE], int a[SIZE], int exp)
51 {
52  int blockID, i, bucket_indx, a_indx;
53  blockID = 0;
54  hist_1 : for (blockID=0; blockID<NUMOFBLOCKS; blockID++) {
55  hist_2 : for(i=0; i<4; i++) {
56  a_indx = blockID * ELEMENTSPERBLOCK + i;
57  bucket_indx = ((a[a_indx] >> exp) & 0x3)*NUMOFBLOCKS + blockID + 1;
58  bucket[bucket_indx]++;
59  }
60  }
61 }
62 
63 void update(int b[SIZE], int bucket[BUCKETSIZE], int a[SIZE], int exp)
64 {
65  int i, blockID, bucket_indx, a_indx;
66  blockID = 0;
67 
68  update_1 : for (blockID = 0; blockID < NUMOFBLOCKS; blockID++) {
69  update_2 : for(i=0; i<4; i++) {
70  bucket_indx = ((a[blockID * ELEMENTSPERBLOCK + i] >> exp) & 0x3)*NUMOFBLOCKS + blockID;
71  a_indx = blockID * ELEMENTSPERBLOCK + i;
72  b[bucket[bucket_indx]] = a[a_indx];
73  bucket[bucket_indx]++;
74  }
75  }
76 }
77 
78 void ss_sort(int a[SIZE], int b[SIZE], int bucket[BUCKETSIZE], int sum[SCAN_RADIX]){
79  int exp=0;
80  int valid_buffer=0;
81  #define BUFFER_A 0
82  #define BUFFER_B 1
83 
84  sort_1 : for (exp=0; exp<32; exp+=2) {
85  init(bucket);
86  if (valid_buffer == BUFFER_A) {
87  hist(bucket, a, exp);
88  } else {
89  hist(bucket, b, exp);
90  }
91 
92  local_scan(bucket);
93  sum_scan(sum, bucket);
94  last_step_scan(bucket, sum);
95 
96  if (valid_buffer==BUFFER_A) {
97  update(b, bucket, a, exp);
98  valid_buffer = BUFFER_B;
99  } else {
100  update(a, bucket, b, exp);
101  valid_buffer = BUFFER_A;
102  }
103  }
104  // If trip count is even, buffer A will be valid at the end.
105 }
#define SCAN_RADIX
Definition: sort.h:25
void local_scan(int bucket[BUCKETSIZE])
Definition: sort.c:10
#define SCAN_BLOCK
Definition: sort.h:24
int sum
Definition: dotproduct.h:3
#define BUFFER_A
void last_step_scan(int bucket[BUCKETSIZE], int sum[SCAN_RADIX])
Definition: sort.c:31
void ss_sort(int a[SIZE], int b[SIZE], int bucket[BUCKETSIZE], int sum[SCAN_RADIX])
Definition: sort.c:78
#define BUCKETSIZE
Definition: sort.h:21
#define SIZE
Definition: adpcm.c:775
void init(int bucket[BUCKETSIZE])
Definition: sort.c:42
#define ELEMENTSPERBLOCK
Definition: sort.h:19
#define BUFFER_B
#define NUMOFBLOCKS
Definition: gemm.h:20
void sum_scan(int sum[SCAN_RADIX], int bucket[BUCKETSIZE])
Definition: sort.c:21
void update(int b[SIZE], int bucket[BUCKETSIZE], int a[SIZE], int exp)
Definition: sort.c:63
uint32_t exp
void hist(int bucket[BUCKETSIZE], int a[SIZE], int exp)
Definition: sort.c:50

Generated on Mon Feb 12 2024 13:02:50 for PandA-2024.02 by doxygen 1.8.13