15 void kernel(int32_t y_outer_x_outer_fused,
float *
compute,
float* placeholder,
float* placeholder1)
18 compute1[0] = 0.000000e+00f;
19 compute1[0] = (compute1[0] + (placeholder[0] * placeholder1[y_outer_x_outer_fused]));
20 compute[y_outer_x_outer_fused] = 0.000000e+00f;
21 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[0]);
25 void parallel(
float *
compute,
float* placeholder,
float* placeholder1)
27 int32_t y_outer_x_outer_fused;
28 #pragma omp parallel for 29 for (y_outer_x_outer_fused = 0; y_outer_x_outer_fused < 64; ++y_outer_x_outer_fused)
31 kernel(y_outer_x_outer_fused, compute, placeholder, placeholder1);
39 void* arg0 = (((
TVMValue*)args)[0].v_handle);
40 float* placeholder = (
float*)(((
TVMArray*)arg0)[0].data);
42 void* arg1 = (((
TVMValue*)args)[1].v_handle);
43 float* placeholder1 = (
float*)(((
TVMArray*)arg1)[0].data);
45 void* arg2 = (((
TVMValue*)args)[2].v_handle);
46 float* placeholder2 = (
float*)(((
TVMArray*)arg2)[0].data);
48 void* arg3 = (((
TVMValue*)args)[3].v_handle);
49 float* T_add = (
float*)(((
TVMArray*)arg3)[0].data);
53 parallel(compute, placeholder, placeholder1);
55 for (ax1 = 0; ax1 < 64; ++ax1) {
56 T_add[ax1] = (compute[ax1] + placeholder2[ax1]);
64 a0[0].
data = placeholder;
65 a1[0].
data = placeholder1;
66 a2[0].
data = placeholder2;
72 #ifdef BAMBU_PROFILING 78 #ifdef BAMBU_PROFILING TVM_DLL int32_t fused_nn_dense_add(void *args, void *arg_type_ids, int32_t num_args)
void __builtin_bambu_time_start()
int compute(int a, int b, int c, int d, int e, int f, int g, int expected)
Union type of values being passed through API and function calls.
void * data
The opaque data pointer points to the allocated data. This will be CUDA device pointer or cl_mem hand...
void kernel(unsigned vertex, unsigned *p_Qnext, unsigned *Qnext_N, unsigned *map)
__attribute__((noinline))
Convert the given fixedpt number to a decimal string.
Plain C Tensor object, does not manage memory.
int32_t fused_nn_dense_add_wrapper(float *placeholder, float *placeholder1, float *placeholder2, float *T_add)
void __builtin_bambu_time_stop()