PandA-2024.02
11_conv2d_b.cc
Go to the documentation of this file.
1 #include "tvm/runtime/c_runtime_api.h"
2 #include "tvm/runtime/c_backend_api.h"
3 TVM_DLL int32_t fused_layout_transform_2( void* args, void* arg_type_ids, int32_t num_args) {
4  void* arg0 = (((TVMValue*)args)[0].v_handle);
5  void* arg1 = (((TVMValue*)args)[1].v_handle);
6  float* placeholder = (float*)(((TVMArray*)arg0)[0].data);
7  float* T_layout_trans = (float*)(((TVMArray*)arg1)[0].data);
8  for (int32_t ax0_ax1_fused_ax2_fused = 0; ax0_ax1_fused_ax2_fused < 64; ++ax0_ax1_fused_ax2_fused) {
9  for (int32_t ax3 = 0; ax3 < 64; ++ax3) {
10  T_layout_trans[((ax0_ax1_fused_ax2_fused * 64) + ax3)] = placeholder[((ax0_ax1_fused_ax2_fused * 64) + ax3)];
11  }
12  }
13  return 0;
14 }
15 
16 TVM_DLL int32_t fused_layout_transform_1( void* args, void* arg_type_ids, int32_t num_args) {
17  void* arg0 = (((TVMValue*)args)[0].v_handle);
18  void* arg1 = (((TVMValue*)args)[1].v_handle);
19  float* placeholder = (float*)(((TVMArray*)arg0)[0].data);
20  float* T_layout_trans = (float*)(((TVMArray*)arg1)[0].data);
21  for (int32_t ax0_ax1_fused = 0; ax0_ax1_fused < 2; ++ax0_ax1_fused) {
22  for (int32_t ax2 = 0; ax2 < 64; ++ax2) {
23  for (int32_t ax3 = 0; ax3 < 64; ++ax3) {
24  T_layout_trans[(((ax0_ax1_fused * 4096) + (ax2 * 64)) + ax3)] = placeholder[(((ax2 * 128) + (ax3 * 2)) + ax0_ax1_fused)];
25  }
26  }
27  }
28  return 0;
29 }
30 
31 #ifdef __cplusplus
32 extern "C"
33 #endif
34 TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc( void* args, void* arg_type_ids, int32_t num_args) {
35  void* arg0 = (((TVMValue*)args)[0].v_handle);
36  int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
37  void* arg1 = (((TVMValue*)args)[1].v_handle);
38  int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
39  void* arg2 = (((TVMValue*)args)[2].v_handle);
40  int32_t arg2_code = (( int32_t*)arg_type_ids)[2];
41  float* placeholder = (float*)(((TVMArray*)arg0)[0].data);
42  int64_t* arg0_shape = (int64_t*)(((TVMArray*)arg0)[0].shape);
43  int64_t* arg0_strides = (int64_t*)(((TVMArray*)arg0)[0].strides);
44  int32_t dev_type = (((TVMArray*)arg0)[0].ctx.device_type);
45  int32_t dev_id = (((TVMArray*)arg0)[0].ctx.device_id);
46  float* placeholder1 = (float*)(((TVMArray*)arg1)[0].data);
47  int64_t* arg1_shape = (int64_t*)(((TVMArray*)arg1)[0].shape);
48  int64_t* arg1_strides = (int64_t*)(((TVMArray*)arg1)[0].strides);
49  float* conv2d_NCHWc = (float*)(((TVMArray*)arg2)[0].data);
50  int64_t* arg2_shape = (int64_t*)(((TVMArray*)arg2)[0].shape);
51  int64_t* arg2_strides = (int64_t*)(((TVMArray*)arg2)[0].strides);
52  if (!(arg0_strides == NULL)) {
53  }
54  if (!(arg1_strides == NULL)) {
55  }
56  if (!(arg2_strides == NULL)) {
57  }
58  void* data_pad = TVMBackendAllocWorkspace(1, dev_id, (uint64_t)17424, 2, 32);
59  if (data_pad == NULL) {
60  return -1;
61  }
62  for (int32_t i1_i2_fused = 0; i1_i2_fused < 66; ++i1_i2_fused) {
63  for (int32_t i3 = 0; i3 < 66; ++i3) {
64  (( float*)data_pad)[((i1_i2_fused * 66) + i3)] = (((((1 <= i1_i2_fused) && (i1_i2_fused < 65)) && (1 <= i3)) && (i3 < 65)) ? placeholder[(((i1_i2_fused * 64) + i3) - 65)] : 0.000000e+00f);
65  }
66  }
67  for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 64; ++n_oc_chunk_fused_oh_fused) {
68  float2 conv2d_NCHWc_global[16];
69  for (int32_t ow_outer = 0; ow_outer < 4; ++ow_outer) {
70  conv2d_NCHWc_global[0] = ((float2)(0.000000e+00f, 0.000000e+00f));
71  conv2d_NCHWc_global[1] = ((float2)(0.000000e+00f, 0.000000e+00f));
72  conv2d_NCHWc_global[2] = ((float2)(0.000000e+00f, 0.000000e+00f));
73  conv2d_NCHWc_global[3] = ((float2)(0.000000e+00f, 0.000000e+00f));
74  conv2d_NCHWc_global[4] = ((float2)(0.000000e+00f, 0.000000e+00f));
75  conv2d_NCHWc_global[5] = ((float2)(0.000000e+00f, 0.000000e+00f));
76  conv2d_NCHWc_global[6] = ((float2)(0.000000e+00f, 0.000000e+00f));
77  conv2d_NCHWc_global[7] = ((float2)(0.000000e+00f, 0.000000e+00f));
78  conv2d_NCHWc_global[8] = ((float2)(0.000000e+00f, 0.000000e+00f));
79  conv2d_NCHWc_global[9] = ((float2)(0.000000e+00f, 0.000000e+00f));
80  conv2d_NCHWc_global[10] = ((float2)(0.000000e+00f, 0.000000e+00f));
81  conv2d_NCHWc_global[11] = ((float2)(0.000000e+00f, 0.000000e+00f));
82  conv2d_NCHWc_global[12] = ((float2)(0.000000e+00f, 0.000000e+00f));
83  conv2d_NCHWc_global[13] = ((float2)(0.000000e+00f, 0.000000e+00f));
84  conv2d_NCHWc_global[14] = ((float2)(0.000000e+00f, 0.000000e+00f));
85  conv2d_NCHWc_global[15] = ((float2)(0.000000e+00f, 0.000000e+00f));
86  for (int32_t kh = 0; kh < 3; ++kh) {
87  for (int32_t kw = 0; kw < 3; ++kw) {
88  conv2d_NCHWc_global[0] = (conv2d_NCHWc_global[0] + (((float2)((( float*)data_pad)[((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw)], (( float*)data_pad)[((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
89  conv2d_NCHWc_global[1] = (conv2d_NCHWc_global[1] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 1)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 1)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
90  conv2d_NCHWc_global[2] = (conv2d_NCHWc_global[2] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 2)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 2)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
91  conv2d_NCHWc_global[3] = (conv2d_NCHWc_global[3] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 3)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 3)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
92  conv2d_NCHWc_global[4] = (conv2d_NCHWc_global[4] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 4)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 4)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
93  conv2d_NCHWc_global[5] = (conv2d_NCHWc_global[5] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 5)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 5)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
94  conv2d_NCHWc_global[6] = (conv2d_NCHWc_global[6] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 6)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 6)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
95  conv2d_NCHWc_global[7] = (conv2d_NCHWc_global[7] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 7)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 7)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
96  conv2d_NCHWc_global[8] = (conv2d_NCHWc_global[8] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 8)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 8)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
97  conv2d_NCHWc_global[9] = (conv2d_NCHWc_global[9] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 9)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 9)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
98  conv2d_NCHWc_global[10] = (conv2d_NCHWc_global[10] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 10)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 10)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
99  conv2d_NCHWc_global[11] = (conv2d_NCHWc_global[11] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 11)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 11)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
100  conv2d_NCHWc_global[12] = (conv2d_NCHWc_global[12] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 12)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 12)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
101  conv2d_NCHWc_global[13] = (conv2d_NCHWc_global[13] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 13)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 13)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
102  conv2d_NCHWc_global[14] = (conv2d_NCHWc_global[14] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 14)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 14)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
103  conv2d_NCHWc_global[15] = (conv2d_NCHWc_global[15] + (((float2)((( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 15)], (( float*)data_pad)[(((((kh * 66) + (n_oc_chunk_fused_oh_fused * 66)) + (ow_outer * 16)) + kw) + 15)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
104  }
105  }
106  for (int32_t ow_inner = 0; ow_inner < 16; ++ow_inner) {
107  (( float2*)(conv2d_NCHWc + (((n_oc_chunk_fused_oh_fused * 128) + (ow_outer * 32)) + (ow_inner * 2))))[0] = (( float2*)(( float*)conv2d_NCHWc_global + (ow_inner * 2)))[0];
108  }
109  }
110  }
111  if (TVMBackendFreeWorkspace(1, dev_id, data_pad) != 0) {
112  return -1;
113  }
114  return 0;
115 }
116 
#define NULL
TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc(void *args, void *arg_type_ids, int32_t num_args)
Definition: 11_conv2d_b.cc:34
float float2
Union type of values being passed through API and function calls.
TVM_DLL int TVMBackendFreeWorkspace(int device_type, int device_id, void *ptr)
Backend function to free temporal workspace.
TVM_DLL int32_t fused_layout_transform_1(void *args, void *arg_type_ids, int32_t num_args)
Definition: 11_conv2d_b.cc:16
#define TVM_DLL
Definition: c_runtime_api.h:59
TVM_DLL void * TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t nbytes, int dtype_code_hint, int dtype_bits_hint)
Backend function to allocate temporal workspace.
Plain C Tensor object, does not manage memory.
Definition: dlpack.h:111
TVM_DLL int32_t fused_layout_transform_2(void *args, void *arg_type_ids, int32_t num_args)
Definition: 11_conv2d_b.cc:3

Generated on Mon Feb 12 2024 13:02:50 for PandA-2024.02 by doxygen 1.8.13