PandA-2024.02
05_dense_b.parallel.c
Go to the documentation of this file.
1 #include "c_backend_api.h"
2 
3 #ifdef BAMBU_PROFILING
4 extern void __builtin_bambu_time_start();
5 extern void __builtin_bambu_time_stop();
6 #endif
7 
13 
14 __attribute__((noinline))
15 void kernel(int32_t y_outer_x_outer_fused, float *compute, float* placeholder, float* placeholder1)
16 {
17  float compute1[1];
18  compute1[0] = 0.000000e+00f;
19  compute1[0] = (compute1[0] + (placeholder[0] * placeholder1[y_outer_x_outer_fused]));
20  compute[y_outer_x_outer_fused] = 0.000000e+00f;
21  compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[0]);
22 }
23 
24 __attribute__((noinline))
25 void parallel(float *compute, float* placeholder, float* placeholder1)
26 {
27  int32_t y_outer_x_outer_fused;
28  #pragma omp parallel for
29  for (y_outer_x_outer_fused = 0; y_outer_x_outer_fused < 64; ++y_outer_x_outer_fused)
30  {
31  kernel(y_outer_x_outer_fused, compute, placeholder, placeholder1);
32  }
33 }
34 
35 __attribute__((noinline))
36 int32_t fused_nn_dense_add( void* args, void* arg_type_ids, int32_t num_args)
37 {
38 
39  void* arg0 = (((TVMValue*)args)[0].v_handle);
40  float* placeholder = (float*)(((TVMArray*)arg0)[0].data);
41 
42  void* arg1 = (((TVMValue*)args)[1].v_handle);
43  float* placeholder1 = (float*)(((TVMArray*)arg1)[0].data);
44 
45  void* arg2 = (((TVMValue*)args)[2].v_handle);
46  float* placeholder2 = (float*)(((TVMArray*)arg2)[0].data);
47 
48  void* arg3 = (((TVMValue*)args)[3].v_handle);
49  float* T_add = (float*)(((TVMArray*)arg3)[0].data);
50 
51  float compute[64];
52 
53  parallel(compute, placeholder, placeholder1);
54  int32_t ax1;
55  for (ax1 = 0; ax1 < 64; ++ax1) {
56  T_add[ax1] = (compute[ax1] + placeholder2[ax1]);
57  }
58  return 0;
59 }
60 
61 int32_t fused_nn_dense_add_wrapper(float* placeholder, float* placeholder1, float* placeholder2, float* T_add)
62 {
63  int32_t res;
64  a0[0].data = placeholder;
65  a1[0].data = placeholder1;
66  a2[0].data = placeholder2;
67  a3[0].data = T_add;
68  param[0].v_handle = a0;
69  param[1].v_handle = a1;
70  param[2].v_handle = a2;
71  param[3].v_handle = a3;
72 #ifdef BAMBU_PROFILING
74 #endif
75 
76  res = fused_nn_dense_add(param, 0, 0);
77 
78 #ifdef BAMBU_PROFILING
80 #endif
81 
82  return res;
83 }
TVM_DLL int32_t fused_nn_dense_add(void *args, void *arg_type_ids, int32_t num_args)
Definition: 04_dense_a.cc:7
void __builtin_bambu_time_start()
void * v_handle
int compute(int a, int b, int c, int d, int e, int f, int g, int expected)
Definition: main.c:10
TVMArray a0[1]
TVMArray a3[1]
Union type of values being passed through API and function calls.
void * data
The opaque data pointer points to the allocated data. This will be CUDA device pointer or cl_mem hand...
Definition: dlpack.h:131
void kernel(unsigned vertex, unsigned *p_Qnext, unsigned *Qnext_N, unsigned *map)
Definition: bfs.c:44
__attribute__((noinline))
Convert the given fixedpt number to a decimal string.
TVMArray a1[1]
TVMArray a2[1]
TVMValue param[4]
Plain C Tensor object, does not manage memory.
Definition: dlpack.h:111
int32_t fused_nn_dense_add_wrapper(float *placeholder, float *placeholder1, float *placeholder2, float *T_add)
void __builtin_bambu_time_stop()

Generated on Mon Feb 12 2024 13:02:50 for PandA-2024.02 by doxygen 1.8.13