PandA-2024.02
gemm.c
Go to the documentation of this file.
1 /*
2 Implementation based on algorithm described in:
3 The cache performance and optimizations of blocked algorithms
4 M. D. Lam, E. E. Rothberg, and M. E. Wolf
5 ASPLOS 1991
6 */
7 
8 #include "gemm.h"
9 
10 void bbgemm(TYPE m1[N], TYPE m2[N], TYPE prod[N]){
11  int i, k, j, jj, kk;
12  int i_row, k_row;
13  TYPE temp_x, mul;
14 
15  loopjj:for (jj = 0; jj < row_size; jj += block_size){
16  loopkk:for (kk = 0; kk < row_size; kk += block_size){
17  loopi:for ( i = 0; i < row_size; ++i){
18  loopk:for (k = 0; k < block_size; ++k){
19  i_row = i * row_size;
20  k_row = (k + kk) * row_size;
21  temp_x = m1[i_row + k + kk];
22  loopj:for (j = 0; j < block_size; ++j){
23  mul = temp_x * m2[k_row + j + jj];
24  prod[i_row + j + jj] += mul;
25  }
26  }
27  }
28  }
29  }
30 }
void bbgemm(TYPE m1[N], TYPE m2[N], TYPE prod[N])
Definition: gemm.c:10
#define TYPE
Definition: backprop.h:21
#define row_size
Definition: gemm.h:16
static const uint32_t k[]
Definition: sha-256.c:22
#define block_size
Definition: gemm.h:19
const unsigned long long m2
Definition: digitrec_sw.c:13
#define N
Definition: dfdiv.c:60
const unsigned long long m1
Definition: digitrec_sw.c:12

Generated on Mon Feb 12 2024 13:02:49 for PandA-2024.02 by doxygen 1.8.13