1 #include "tvm/runtime/c_runtime_api.h" 2 #include "tvm/runtime/c_backend_api.h" 8 void* arg0 = (((
TVMValue*)args)[0].v_handle);
9 int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
10 void* arg1 = (((
TVMValue*)args)[1].v_handle);
11 int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
12 void* arg2 = (((
TVMValue*)args)[2].v_handle);
13 int32_t arg2_code = (( int32_t*)arg_type_ids)[2];
14 void* arg3 = (((
TVMValue*)args)[3].v_handle);
15 int32_t arg3_code = (( int32_t*)arg_type_ids)[3];
16 float* placeholder = (
float*)(((
TVMArray*)arg0)[0].data);
18 int64_t* arg0_strides = (int64_t*)(((
TVMArray*)arg0)[0].strides);
19 int32_t dev_type = (((
TVMArray*)arg0)[0].ctx.device_type);
20 int32_t dev_id = (((
TVMArray*)arg0)[0].ctx.device_id);
21 float* placeholder1 = (
float*)(((
TVMArray*)arg1)[0].data);
22 int64_t* arg1_shape = (int64_t*)(((
TVMArray*)arg1)[0].shape);
23 int64_t* arg1_strides = (int64_t*)(((
TVMArray*)arg1)[0].strides);
24 float* placeholder2 = (
float*)(((
TVMArray*)arg2)[0].data);
25 int64_t* arg2_shape = (int64_t*)(((
TVMArray*)arg2)[0].shape);
26 int64_t* arg2_strides = (int64_t*)(((
TVMArray*)arg2)[0].strides);
27 float* T_add = (
float*)(((
TVMArray*)arg3)[0].data);
28 int64_t* arg3_shape = (int64_t*)(((
TVMArray*)arg3)[0].shape);
29 int64_t* arg3_strides = (int64_t*)(((
TVMArray*)arg3)[0].strides);
30 if (!(arg0_strides ==
NULL)) {
32 if (!(arg1_strides ==
NULL)) {
34 if (!(arg2_strides ==
NULL)) {
36 if (!(arg3_strides ==
NULL)) {
39 for (int32_t y_outer_x_outer_fused = 0; y_outer_x_outer_fused < 10; ++y_outer_x_outer_fused) {
41 (( float16*)(compute1 + 0))[0] = ((float16)(0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f, 0.000000e+00f));
42 for (int32_t
k = 0;
k < 49; ++
k) {
43 (( float16*)(compute1 + 0))[0] = ((( float16*)(compute1 + 0))[0] + ((( float16*)(placeholder + (
k * 16)))[0] * (( float16*)(placeholder1 + ((y_outer_x_outer_fused * 784) + (
k * 16))))[0]));
45 compute[y_outer_x_outer_fused] = 0.000000e+00f;
46 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[0]);
47 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[1]);
48 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[2]);
49 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[3]);
50 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[4]);
51 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[5]);
52 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[6]);
53 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[7]);
54 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[8]);
55 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[9]);
56 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[10]);
57 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[11]);
58 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[12]);
59 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[13]);
60 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[14]);
61 compute[y_outer_x_outer_fused] = (compute[y_outer_x_outer_fused] + compute1[15]);
63 for (int32_t ax1 = 0; ax1 < 10; ++ax1) {
64 T_add[ax1] = (compute[ax1] + placeholder2[ax1]);
73 void* arg0 = (((
TVMValue*)args)[0].v_handle);
74 int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
75 void* arg1 = (((
TVMValue*)args)[1].v_handle);
76 int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
77 float* placeholder = (
float*)(((
TVMArray*)arg0)[0].data);
79 int64_t* arg0_strides = (int64_t*)(((
TVMArray*)arg0)[0].strides);
80 int32_t dev_type = (((
TVMArray*)arg0)[0].ctx.device_type);
81 int32_t dev_id = (((
TVMArray*)arg0)[0].ctx.device_id);
82 float* tensor = (
float*)(((
TVMArray*)arg1)[0].data);
83 int64_t* arg1_shape = (int64_t*)(((
TVMArray*)arg1)[0].shape);
84 int64_t* arg1_strides = (int64_t*)(((
TVMArray*)arg1)[0].strides);
85 if (!(arg0_strides ==
NULL)) {
87 if (!(arg1_strides ==
NULL)) {
92 tensor1[0] = -3.402823e+38f;
93 for (int32_t k1 = 0; k1 < 10; ++k1) {
94 float _1 = tensor1[0];
95 float _2 = placeholder[k1];
96 tensor1[0] = ((_1) > (_2) ? (_1) : (_2));
98 for (int32_t ax1 = 0; ax1 < 10; ++ax1) {
99 tensor2[ax1] = expf((placeholder[ax1] - tensor1[0]));
101 tensor3[0] = 0.000000e+00f;
102 for (int32_t k2 = 0; k2 < 10; ++k2) {
103 tensor3[0] = (tensor3[0] + tensor2[k2]);
105 for (int32_t ax11 = 0; ax11 < 10; ++ax11) {
106 tensor[ax11] = (tensor2[ax11] / tensor3[0]);
int compute(int a, int b, int c, int d, int e, int f, int g, int expected)
Union type of values being passed through API and function calls.
static const uint32_t k[]
Plain C Tensor object, does not manage memory.
TVM_DLL int32_t fused_nn_softmax(void *args, void *arg_type_ids, int32_t num_args)
TVM_DLL int32_t fused_nn_dense_add(void *args, void *arg_type_ids, int32_t num_args)