PandA-2024.02
09_conv2d_a.cc
Go to the documentation of this file.
1 #include "tvm/runtime/c_runtime_api.h"
2 #include "tvm/runtime/c_backend_api.h"
3 extern void* __tvm_module_ctx = NULL;
4 #ifdef __cplusplus
5 extern "C"
6 #endif
7 TVM_DLL int32_t fused_layout_transform_1( void* args, void* arg_type_ids, int32_t num_args) {
8  void* arg0 = (((TVMValue*)args)[0].v_handle);
9  int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
10  void* arg1 = (((TVMValue*)args)[1].v_handle);
11  int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
12  float* placeholder = (float*)(((TVMArray*)arg0)[0].data);
13  int64_t* arg0_shape = (int64_t*)(((TVMArray*)arg0)[0].shape);
14  int64_t* arg0_strides = (int64_t*)(((TVMArray*)arg0)[0].strides);
15  int32_t dev_type = (((TVMArray*)arg0)[0].ctx.device_type);
16  int32_t dev_id = (((TVMArray*)arg0)[0].ctx.device_id);
17  float* T_layout_trans = (float*)(((TVMArray*)arg1)[0].data);
18  int64_t* arg1_shape = (int64_t*)(((TVMArray*)arg1)[0].shape);
19  int64_t* arg1_strides = (int64_t*)(((TVMArray*)arg1)[0].strides);
20  if (!(arg0_strides == NULL)) {
21  }
22  if (!(arg1_strides == NULL)) {
23  }
24  for (int32_t ax0_ax1_fused = 0; ax0_ax1_fused < 2; ++ax0_ax1_fused) {
25  for (int32_t ax2 = 0; ax2 < 8; ++ax2) {
26  for (int32_t ax3 = 0; ax3 < 8; ++ax3) {
27  T_layout_trans[(((ax0_ax1_fused * 64) + (ax2 * 8)) + ax3)] = placeholder[(((ax2 * 16) + (ax3 * 2)) + ax0_ax1_fused)];
28  }
29  }
30  }
31  return 0;
32 }
33 
34 #ifdef __cplusplus
35 extern "C"
36 #endif
37 TVM_DLL int32_t fused_layout_transform_2( void* args, void* arg_type_ids, int32_t num_args) {
38  void* arg0 = (((TVMValue*)args)[0].v_handle);
39  int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
40  void* arg1 = (((TVMValue*)args)[1].v_handle);
41  int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
42  float* placeholder = (float*)(((TVMArray*)arg0)[0].data);
43  int64_t* arg0_shape = (int64_t*)(((TVMArray*)arg0)[0].shape);
44  int64_t* arg0_strides = (int64_t*)(((TVMArray*)arg0)[0].strides);
45  int32_t dev_type = (((TVMArray*)arg0)[0].ctx.device_type);
46  int32_t dev_id = (((TVMArray*)arg0)[0].ctx.device_id);
47  float* T_layout_trans = (float*)(((TVMArray*)arg1)[0].data);
48  int64_t* arg1_shape = (int64_t*)(((TVMArray*)arg1)[0].shape);
49  int64_t* arg1_strides = (int64_t*)(((TVMArray*)arg1)[0].strides);
50  if (!(arg0_strides == NULL)) {
51  }
52  if (!(arg1_strides == NULL)) {
53  }
54  for (int32_t ax0_ax1_fused_ax2_fused = 0; ax0_ax1_fused_ax2_fused < 8; ++ax0_ax1_fused_ax2_fused) {
55  for (int32_t ax3 = 0; ax3 < 8; ++ax3) {
56  T_layout_trans[((ax0_ax1_fused_ax2_fused * 8) + ax3)] = placeholder[((ax0_ax1_fused_ax2_fused * 8) + ax3)];
57  }
58  }
59  return 0;
60 }
61 
62 #ifdef __cplusplus
63 extern "C"
64 #endif
65 TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc( void* args, void* arg_type_ids, int32_t num_args) {
66  void* arg0 = (((TVMValue*)args)[0].v_handle);
67  int32_t arg0_code = (( int32_t*)arg_type_ids)[0];
68  void* arg1 = (((TVMValue*)args)[1].v_handle);
69  int32_t arg1_code = (( int32_t*)arg_type_ids)[1];
70  void* arg2 = (((TVMValue*)args)[2].v_handle);
71  int32_t arg2_code = (( int32_t*)arg_type_ids)[2];
72  float* placeholder = (float*)(((TVMArray*)arg0)[0].data);
73  int64_t* arg0_shape = (int64_t*)(((TVMArray*)arg0)[0].shape);
74  int64_t* arg0_strides = (int64_t*)(((TVMArray*)arg0)[0].strides);
75  int32_t dev_type = (((TVMArray*)arg0)[0].ctx.device_type);
76  int32_t dev_id = (((TVMArray*)arg0)[0].ctx.device_id);
77  float* placeholder1 = (float*)(((TVMArray*)arg1)[0].data);
78  int64_t* arg1_shape = (int64_t*)(((TVMArray*)arg1)[0].shape);
79  int64_t* arg1_strides = (int64_t*)(((TVMArray*)arg1)[0].strides);
80  float* conv2d_NCHWc = (float*)(((TVMArray*)arg2)[0].data);
81  int64_t* arg2_shape = (int64_t*)(((TVMArray*)arg2)[0].shape);
82  int64_t* arg2_strides = (int64_t*)(((TVMArray*)arg2)[0].strides);
83  if (!(arg0_strides == NULL)) {
84  }
85  if (!(arg1_strides == NULL)) {
86  }
87  if (!(arg2_strides == NULL)) {
88  }
89  float data_pad[100];
90  for (int32_t i1_i2_fused = 0; i1_i2_fused < 10; ++i1_i2_fused) {
91  for (int32_t i3 = 0; i3 < 10; ++i3) {
92  data_pad[((i1_i2_fused * 10) + i3)] = (((((1 <= i1_i2_fused) && (i1_i2_fused < 9)) && (1 <= i3)) && (i3 < 9)) ? placeholder[(((i1_i2_fused * 8) + i3) - 9)] : 0.000000e+00f);
93  }
94  }
95  for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 8; ++n_oc_chunk_fused_oh_fused) {
96  float2 conv2d_NCHWc_global[8];
97  conv2d_NCHWc_global[0] = ((float2)(0.000000e+00f, 0.000000e+00f));
98  conv2d_NCHWc_global[1] = ((float2)(0.000000e+00f, 0.000000e+00f));
99  conv2d_NCHWc_global[2] = ((float2)(0.000000e+00f, 0.000000e+00f));
100  conv2d_NCHWc_global[3] = ((float2)(0.000000e+00f, 0.000000e+00f));
101  conv2d_NCHWc_global[4] = ((float2)(0.000000e+00f, 0.000000e+00f));
102  conv2d_NCHWc_global[5] = ((float2)(0.000000e+00f, 0.000000e+00f));
103  conv2d_NCHWc_global[6] = ((float2)(0.000000e+00f, 0.000000e+00f));
104  conv2d_NCHWc_global[7] = ((float2)(0.000000e+00f, 0.000000e+00f));
105  for (int32_t kh = 0; kh < 3; ++kh) {
106  for (int32_t kw = 0; kw < 3; ++kw) {
107  conv2d_NCHWc_global[0] = (conv2d_NCHWc_global[0] + (((float2)(data_pad[(((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw)], data_pad[(((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
108  conv2d_NCHWc_global[1] = (conv2d_NCHWc_global[1] + (((float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 1)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 1)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
109  conv2d_NCHWc_global[2] = (conv2d_NCHWc_global[2] + (((float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 2)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 2)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
110  conv2d_NCHWc_global[3] = (conv2d_NCHWc_global[3] + (((float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 3)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 3)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
111  conv2d_NCHWc_global[4] = (conv2d_NCHWc_global[4] + (((float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 4)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 4)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
112  conv2d_NCHWc_global[5] = (conv2d_NCHWc_global[5] + (((float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 5)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 5)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
113  conv2d_NCHWc_global[6] = (conv2d_NCHWc_global[6] + (((float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 6)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 6)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
114  conv2d_NCHWc_global[7] = (conv2d_NCHWc_global[7] + (((float2)(data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 7)], data_pad[((((kh * 10) + (n_oc_chunk_fused_oh_fused * 10)) + kw) + 7)])) * (( float2*)(placeholder1 + ((kh * 6) + (kw * 2))))[0]));
115  }
116  }
117  for (int32_t ow_inner = 0; ow_inner < 8; ++ow_inner) {
118  (( float2*)(conv2d_NCHWc + ((n_oc_chunk_fused_oh_fused * 16) + (ow_inner * 2))))[0] = (( float2*)(( float*)conv2d_NCHWc_global + (ow_inner * 2)))[0];
119  }
120  }
121  return 0;
122 }
123 
#define NULL
TVM_DLL int32_t fused_layout_transform_1(void *args, void *arg_type_ids, int32_t num_args)
Definition: 09_conv2d_a.cc:7
float float2
void * __tvm_module_ctx
Union type of values being passed through API and function calls.
TVM_DLL int32_t fused_layout_transform_2(void *args, void *arg_type_ids, int32_t num_args)
Definition: 09_conv2d_a.cc:37
TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc(void *args, void *arg_type_ids, int32_t num_args)
Definition: 09_conv2d_a.cc:65
#define TVM_DLL
Definition: c_runtime_api.h:59
Plain C Tensor object, does not manage memory.
Definition: dlpack.h:111

Generated on Mon Feb 12 2024 13:02:50 for PandA-2024.02 by doxygen 1.8.13