PandA-2024.02
chenidct.c
Go to the documentation of this file.
1 /*
2 +--------------------------------------------------------------------------+
3 | CHStone : A suite of Benchmark Programs for C-based High-Level Synthesis |
4 | ======================================================================== |
5 | |
6 | * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
7 | H. Takada and K. Ishii |
8 | Nagoya University, Japan |
9 | |
10 | * Remarks : |
11 | 1. This source code is reformatted to follow CHStone's style. |
12 | 2. Test vectors are added for CHStone. |
13 | 3. If "main_result" is 0 at the end of the program, the program is |
14 | successfully executed. |
15 | 4. Follow the copyright of each benchmark program. |
16 +--------------------------------------------------------------------------+
17 */
18 /*
19  * IDCT transformation of Chen algorithm
20  *
21  * @(#) $Id: chenidct.c,v 1.2 2003/07/18 10:19:21 honda Exp $
22  */
23 /*************************************************************
24 Copyright (C) 1990, 1991, 1993 Andy C. Hung, all rights reserved.
25 PUBLIC DOMAIN LICENSE: Stanford University Portable Video Research
26 Group. If you use this software, you agree to the following: This
27 program package is purely experimental, and is licensed "as is".
28 Permission is granted to use, modify, and distribute this program
29 without charge for any purpose, provided this license/ disclaimer
30 notice appears in the copies. No warranty or maintenance is given,
31 either expressed or implied. In no event shall the author(s) be
32 liable to you or a third party for any special, incidental,
33 consequential, or other damages, arising out of the use or inability
34 to use the program for any purpose (or the loss of data), even if we
35 have been advised of such possibilities. Any public reference or
36 advertisement of this source code should refer to it as the Portable
37 Video Research Group (PVRG) code, and not by any author(s) (or
38 Stanford University) name.
39 *************************************************************/
40 /*
41 ************************************************************
42 chendct.c
43 
44 A simple DCT algorithm that seems to have fairly nice arithmetic
45 properties.
46 
47 W. H. Chen, C. H. Smith and S. C. Fralick "A fast computational
48 algorithm for the discrete cosine transform," IEEE Trans. Commun.,
49 vol. COM-25, pp. 1004-1009, Sept 1977.
50 
51 ************************************************************
52 */
53 
54 #define LS(r,s) ((r) << (s))
55 #define RS(r,s) ((r) >> (s)) /* Caution with rounding... */
56 
57 #define MSCALE(expr) RS((expr),9)
58 
59 /* Cos constants */
60 
61 #define c1d4 362L
62 
63 #define c1d8 473L
64 #define c3d8 196L
65 
66 #define c1d16 502L
67 #define c3d16 426L
68 #define c5d16 284L
69 #define c7d16 100L
70 
71 
72 /*
73  *
74  * ChenIDCT() implements the Chen inverse dct. Note that there are two
75  * input vectors that represent x=input, and y=output, and must be
76  * defined (and storage allocated) before this routine is called.
77  */
78 void
79 ChenIDct (int *x, int *y)
80 {
81  register int i;
82  register int *aptr;
83  register int a0, a1, a2, a3;
84  register int b0, b1, b2, b3;
85  register int c0, c1, c2, c3;
86 
87  /* Loop over columns */
88 
89  for (i = 0; i < 8; i++)
90  {
91  aptr = x + i;
92  b0 = LS (*aptr, 2);
93  aptr += 8;
94  a0 = LS (*aptr, 2);
95  aptr += 8;
96  b2 = LS (*aptr, 2);
97  aptr += 8;
98  a1 = LS (*aptr, 2);
99  aptr += 8;
100  b1 = LS (*aptr, 2);
101  aptr += 8;
102  a2 = LS (*aptr, 2);
103  aptr += 8;
104  b3 = LS (*aptr, 2);
105  aptr += 8;
106  a3 = LS (*aptr, 2);
107 
108  /* Split into even mode b0 = x0 b1 = x4 b2 = x2 b3 = x6.
109  And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7.
110  */
111 
112  c0 = MSCALE ((c7d16 * a0) - (c1d16 * a3));
113  c1 = MSCALE ((c3d16 * a2) - (c5d16 * a1));
114  c2 = MSCALE ((c3d16 * a1) + (c5d16 * a2));
115  c3 = MSCALE ((c1d16 * a0) + (c7d16 * a3));
116 
117  /* First Butterfly on even terms. */
118 
119  a0 = MSCALE (c1d4 * (b0 + b1));
120  a1 = MSCALE (c1d4 * (b0 - b1));
121 
122  a2 = MSCALE ((c3d8 * b2) - (c1d8 * b3));
123  a3 = MSCALE ((c1d8 * b2) + (c3d8 * b3));
124 
125  b0 = a0 + a3;
126  b1 = a1 + a2;
127  b2 = a1 - a2;
128  b3 = a0 - a3;
129 
130  /* Second Butterfly */
131 
132  a0 = c0 + c1;
133  a1 = c0 - c1;
134  a2 = c3 - c2;
135  a3 = c3 + c2;
136 
137  c0 = a0;
138  c1 = MSCALE (c1d4 * (a2 - a1));
139  c2 = MSCALE (c1d4 * (a2 + a1));
140  c3 = a3;
141 
142  aptr = y + i;
143  *aptr = b0 + c3;
144  aptr += 8;
145  *aptr = b1 + c2;
146  aptr += 8;
147  *aptr = b2 + c1;
148  aptr += 8;
149  *aptr = b3 + c0;
150  aptr += 8;
151  *aptr = b3 - c0;
152  aptr += 8;
153  *aptr = b2 - c1;
154  aptr += 8;
155  *aptr = b1 - c2;
156  aptr += 8;
157  *aptr = b0 - c3;
158  }
159 
160  /* Loop over rows */
161 
162  for (i = 0; i < 8; i++)
163  {
164  aptr = y + LS (i, 3);
165  b0 = *(aptr++);
166  a0 = *(aptr++);
167  b2 = *(aptr++);
168  a1 = *(aptr++);
169  b1 = *(aptr++);
170  a2 = *(aptr++);
171  b3 = *(aptr++);
172  a3 = *(aptr);
173 
174  /*
175  Split into even mode b0 = x0 b1 = x4 b2 = x2 b3 = x6.
176  And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7.
177  */
178 
179  c0 = MSCALE ((c7d16 * a0) - (c1d16 * a3));
180  c1 = MSCALE ((c3d16 * a2) - (c5d16 * a1));
181  c2 = MSCALE ((c3d16 * a1) + (c5d16 * a2));
182  c3 = MSCALE ((c1d16 * a0) + (c7d16 * a3));
183 
184  /* First Butterfly on even terms. */
185 
186  a0 = MSCALE (c1d4 * (b0 + b1));
187  a1 = MSCALE (c1d4 * (b0 - b1));
188 
189  a2 = MSCALE ((c3d8 * b2) - (c1d8 * b3));
190  a3 = MSCALE ((c1d8 * b2) + (c3d8 * b3));
191 
192  /* Calculate last set of b's */
193 
194  b0 = a0 + a3;
195  b1 = a1 + a2;
196  b2 = a1 - a2;
197  b3 = a0 - a3;
198 
199  /* Second Butterfly */
200 
201  a0 = c0 + c1;
202  a1 = c0 - c1;
203  a2 = c3 - c2;
204  a3 = c3 + c2;
205 
206  c0 = a0;
207  c1 = MSCALE (c1d4 * (a2 - a1));
208  c2 = MSCALE (c1d4 * (a2 + a1));
209  c3 = a3;
210 
211  aptr = y + LS (i, 3);
212  *(aptr++) = b0 + c3;
213  *(aptr++) = b1 + c2;
214  *(aptr++) = b2 + c1;
215  *(aptr++) = b3 + c0;
216  *(aptr++) = b3 - c0;
217  *(aptr++) = b2 - c1;
218  *(aptr++) = b1 - c2;
219  *(aptr) = b0 - c3;
220  }
221 
222  /*
223  Retrieve correct accuracy. We have additional factor
224  of 16 that must be removed.
225  */
226 
227  for (i = 0, aptr = y; i < 64; i++, aptr++)
228  *aptr = (((*aptr < 0) ? (*aptr - 8) : (*aptr + 8)) / 16);
229 }
230 
231  /*END*/
TVMArray c2[1]
TVMArray c1[1]
void ChenIDct(int *x, int *y)
Definition: chenidct.c:79
#define c1d16
Definition: chenidct.c:66
TVMArray b0[1]
#define c1d4
Definition: chenidct.c:61
#define LS(r, s)
Definition: chenidct.c:54
#define c3d8
Definition: chenidct.c:64
#define MSCALE(expr)
Definition: chenidct.c:57
TVMArray a3[1]
#define c3d16
Definition: chenidct.c:67
#define c5d16
Definition: chenidct.c:68
TVMArray a0[1]
TVMArray b2[1]
#define c7d16
Definition: chenidct.c:69
TVMArray a2[1]
TVMArray a1[1]
TVMArray b1[1]
#define c1d8
Definition: chenidct.c:63
x
Return the smallest n such that 2^n >= _x.
TVMArray c0[1]

Generated on Mon Feb 12 2024 13:02:49 for PandA-2024.02 by doxygen 1.8.13