1 #include "tvm/runtime/c_runtime_api.h" 2 #include "tvm/runtime/c_backend_api.h" 8 void* arg0 = (((
TVMValue*)args)[0].v_handle);
9 int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
10 void* arg1 = (((
TVMValue*)args)[1].v_handle);
11 int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
12 float* placeholder = (
float*)(((
TVMArray*)arg0)[0].data);
14 int64_t* arg0_strides = (int64_t*)(((
TVMArray*)arg0)[0].strides);
15 int32_t dev_type = (((
TVMArray*)arg0)[0].ctx.device_type);
16 int32_t dev_id = (((
TVMArray*)arg0)[0].ctx.device_id);
17 float* T_layout_trans = (
float*)(((
TVMArray*)arg1)[0].data);
18 int64_t* arg1_shape = (int64_t*)(((
TVMArray*)arg1)[0].shape);
19 int64_t* arg1_strides = (int64_t*)(((
TVMArray*)arg1)[0].strides);
20 if (!(arg0_strides ==
NULL)) {
22 if (!(arg1_strides ==
NULL)) {
24 for (int32_t ax0_ax1_fused_ax2_fused = 0; ax0_ax1_fused_ax2_fused < 8; ++ax0_ax1_fused_ax2_fused) {
25 for (int32_t ax3 = 0; ax3 < 8; ++ax3) {
26 T_layout_trans[((ax0_ax1_fused_ax2_fused * 8) + ax3)] = placeholder[((ax0_ax1_fused_ax2_fused * 8) + ax3)];
36 void* arg0 = (((
TVMValue*)args)[0].v_handle);
37 int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
38 void* arg1 = (((
TVMValue*)args)[1].v_handle);
39 int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
40 float* placeholder = (
float*)(((
TVMArray*)arg0)[0].data);
42 int64_t* arg0_strides = (int64_t*)(((
TVMArray*)arg0)[0].strides);
43 int32_t dev_type = (((
TVMArray*)arg0)[0].ctx.device_type);
44 int32_t dev_id = (((
TVMArray*)arg0)[0].ctx.device_id);
45 float* T_layout_trans = (
float*)(((
TVMArray*)arg1)[0].data);
46 int64_t* arg1_shape = (int64_t*)(((
TVMArray*)arg1)[0].shape);
47 int64_t* arg1_strides = (int64_t*)(((
TVMArray*)arg1)[0].strides);
48 if (!(arg0_strides ==
NULL)) {
50 if (!(arg1_strides ==
NULL)) {
52 for (int32_t ax0_ax1_fused = 0; ax0_ax1_fused < 2; ++ax0_ax1_fused) {
53 for (int32_t ax2 = 0; ax2 < 8; ++ax2) {
54 for (int32_t ax3 = 0; ax3 < 8; ++ax3) {
55 T_layout_trans[(((ax0_ax1_fused * 64) + (ax2 * 8)) + ax3)] = placeholder[(((ax2 * 16) + (ax3 * 2)) + ax0_ax1_fused)];
66 void* arg0 = (((
TVMValue*)args)[0].v_handle);
67 int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
68 void* arg1 = (((
TVMValue*)args)[1].v_handle);
69 int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
70 void* arg2 = (((
TVMValue*)args)[2].v_handle);
71 int32_t arg2_code = (( int32_t*)arg_type_ids)[2];
72 float* placeholder = (
float*)(((
TVMArray*)arg0)[0].data);
74 int64_t* arg0_strides = (int64_t*)(((
TVMArray*)arg0)[0].strides);
75 int32_t dev_type = (((
TVMArray*)arg0)[0].ctx.device_type);
76 int32_t dev_id = (((
TVMArray*)arg0)[0].ctx.device_id);
77 float* placeholder1 = (
float*)(((
TVMArray*)arg1)[0].data);
78 int64_t* arg1_shape = (int64_t*)(((
TVMArray*)arg1)[0].shape);
79 int64_t* arg1_strides = (int64_t*)(((
TVMArray*)arg1)[0].strides);
80 float* conv2d_NCHWc = (
float*)(((
TVMArray*)arg2)[0].data);
81 int64_t* arg2_shape = (int64_t*)(((
TVMArray*)arg2)[0].shape);
82 int64_t* arg2_strides = (int64_t*)(((
TVMArray*)arg2)[0].strides);
83 if (!(arg0_strides ==
NULL)) {
85 if (!(arg1_strides ==
NULL)) {
87 if (!(arg2_strides ==
NULL)) {
90 for (int32_t i1_i2_fused = 0; i1_i2_fused < 10; ++i1_i2_fused) {
91 for (int32_t i3 = 0; i3 < 10; ++i3) {
92 data_pad[((i1_i2_fused * 10) + i3)] = (((((1 <= i1_i2_fused) && (i1_i2_fused < 9)) && (1 <= i3)) && (i3 < 9)) ? placeholder[(((i1_i2_fused * 8) + i3) - 9)] : 0.000000e+00f);
95 for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 8; ++n_oc_chunk_fused_oh_fused) {
96 float2 conv2d_NCHWc_global[8];
97 conv2d_NCHWc_global[0] = ((
float2)(0.000000e+00f, 0.000000e+00f));
98 conv2d_NCHWc_global[1] = ((
float2)(0.000000e+00f, 0.000000e+00f));
99 conv2d_NCHWc_global[2] = ((
float2)(0.000000e+00f, 0.000000e+00f));
100 conv2d_NCHWc_global[3] = ((
float2)(0.000000e+00f, 0.000000e+00f));
101 conv2d_NCHWc_global[4] = ((
float2)(0.000000e+00f, 0.000000e+00f));
102 conv2d_NCHWc_global[5] = ((
float2)(0.000000e+00f, 0.000000e+00f));
103 conv2d_NCHWc_global[6] = ((
float2)(0.000000e+00f, 0.000000e+00f));
104 conv2d_NCHWc_global[7] = ((
float2)(0.000000e+00f, 0.000000e+00f));
105 for (int32_t kh = 0; kh < 3; ++kh) {
106 for (int32_t kw = 0; kw < 3; ++kw) {
107 conv2d_NCHWc_global[0] = (conv2d_NCHWc_global[0] + (((
float2)(data_pad[(((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw)], data_pad[(((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw)])) * ((
float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
108 conv2d_NCHWc_global[1] = (conv2d_NCHWc_global[1] + (((
float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 1)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 1)])) * ((
float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
109 conv2d_NCHWc_global[2] = (conv2d_NCHWc_global[2] + (((
float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 2)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 2)])) * ((
float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
110 conv2d_NCHWc_global[3] = (conv2d_NCHWc_global[3] + (((
float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 3)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 3)])) * ((
float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
111 conv2d_NCHWc_global[4] = (conv2d_NCHWc_global[4] + (((
float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 4)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 4)])) * ((
float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
112 conv2d_NCHWc_global[5] = (conv2d_NCHWc_global[5] + (((
float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 5)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 5)])) * ((
float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
113 conv2d_NCHWc_global[6] = (conv2d_NCHWc_global[6] + (((
float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 6)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 6)])) * ((
float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
114 conv2d_NCHWc_global[7] = (conv2d_NCHWc_global[7] + (((
float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 7)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 7)])) * ((
float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
117 for (int32_t ow_inner = 0; ow_inner < 8; ++ow_inner) {
118 ((
float2*)(conv2d_NCHWc + ((n_oc_chunk_fused_oh_fused * 16) + (ow_inner * 2))))[0] = ((
float2*)((
float*)conv2d_NCHWc_global + (ow_inner * 2)))[0];
TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc(void *args, void *arg_type_ids, int32_t num_args)
Union type of values being passed through API and function calls.
TVM_DLL int32_t fused_layout_transform_1(void *args, void *arg_type_ids, int32_t num_args)
Plain C Tensor object, does not manage memory.
TVM_DLL int32_t fused_layout_transform_2(void *args, void *arg_type_ids, int32_t num_args)