PandA-2024.02
add.c
Go to the documentation of this file.
1 #define NUM_ACCELS 5
2 #define ARRAY_SIZE 10000
3 #define OPS_PER_ACCEL ARRAY_SIZE/NUM_ACCELS
4 
5 #include <stdio.h>
6 #include "add.h"
7 
9 
10 __attribute__((noinline))
11 void add (int accelnum)
12 {
13  int startidx = accelnum * OPS_PER_ACCEL;
14  int endidx = (accelnum+1) * OPS_PER_ACCEL;
15  int sum=0;
16  int i;
17  for (i=startidx; i<endidx; i++)
18  {
19  sum += array[i];
20  }
21  output[accelnum] = sum;
22 }
23 
24 __attribute__((noinline))
25 void add_for(int start, int end)
26 {
27  int i;
28  #pragma omp parallel for num_threads(NUM_ACCELS) private(i)
29  for (i=start; i < end; i++) {
30  add(i);
31  }
32 }
33 
34 int
35 main ()
36 {
37  int sum=0;
38  int i;
39 
40  add_for(0, NUM_ACCELS);
41 
42  //combine results
43  for (i=0; i<NUM_ACCELS; i++) {
44  sum += output[i];
45  }
46 
47  return sum;
48 }
__attribute__((noinline))
Definition: add.c:10
#define OPS_PER_ACCEL
Definition: add.c:3
int sum
Definition: dotproduct.h:3
#define NUM_ACCELS
Definition: add.c:1
int output[NUM_ACCELS]
Definition: add.c:8
void add(int accelnum, int startidx, int endidx)
Definition: add.c:11
int main()
Definition: add.c:35
int array[ARRAY_SIZE]
Definition: add.h:1

Generated on Mon Feb 12 2024 13:02:49 for PandA-2024.02 by doxygen 1.8.13