PandA-2024.02
softfloat.c
Go to the documentation of this file.
1 /*
2 +--------------------------------------------------------------------------+
3 | CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
4 | ======================================================================== |
5 | |
6 | * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
7 | H. Takada and K. Ishii |
8 | Nagoya University, Japan |
9 | |
10 | * Remark : |
11 | 1. This source code is modified to unify the formats of the benchmark |
12 | programs in CHStone. |
13 | 2. Test vectors are added for CHStone. |
14 | 3. If "main_result" is 0 at the end of the program, the program is |
15 | correctly executed. |
16 | 4. Please follow the copyright of each benchmark program. |
17 +--------------------------------------------------------------------------+
18 */
19 /*============================================================================
20 
21 This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
22 Package, Release 2b.
23 
24 Written by John R. Hauser. This work was made possible in part by the
25 International Computer Science Institute, located at Suite 600, 1947 Center
26 Street, Berkeley, California 94704. Funding was partially provided by the
27 National Science Foundation under grant MIP-9311980. The original version
28 of this code was written as part of a project to build a fixed-point vector
29 processor in collaboration with the University of California at Berkeley,
30 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
31 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
32 arithmetic/SoftFloat.html'.
33 
34 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
35 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
36 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
37 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
38 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
39 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
40 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
41 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
42 
43 Derivative works are acceptable, even for commercial purposes, so long as
44 (1) the source code for the derivative work includes prominent notice that
45 the work is derivative, and (2) the source code includes prominent notice with
46 these four paragraphs for those parts of this code that are retained.
47 
48 =============================================================================*/
49 
50 #include "milieu.h"
51 #include "softfloat.h"
52 
53 /*----------------------------------------------------------------------------
54 | Floating-point rounding mode, extended double-precision rounding precision,
55 | and exception flags.
56 *----------------------------------------------------------------------------*/
59 
60 /*----------------------------------------------------------------------------
61 | Primitive arithmetic functions, including multi-word arithmetic, and
62 | division and square root approximations. (Can be specialized to target if
63 | desired.)
64 *----------------------------------------------------------------------------*/
65 #include "softfloat-macros"
66 
67 /*----------------------------------------------------------------------------
68 | Functions and definitions to determine: (1) whether tininess for underflow
69 | is detected before or after rounding by default, (2) what (if anything)
70 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
71 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
72 | are propagated from function inputs to output. These details are target-
73 | specific.
74 *----------------------------------------------------------------------------*/
75 #include "softfloat-specialize"
76 
77 /*----------------------------------------------------------------------------
78 | Returns the fraction bits of the double-precision floating-point value `a'.
79 *----------------------------------------------------------------------------*/
80 
83 {
84 
85  return a & LIT64 (0x000FFFFFFFFFFFFF);
86 
87 }
88 
89 /*----------------------------------------------------------------------------
90 | Returns the exponent bits of the double-precision floating-point value `a'.
91 *----------------------------------------------------------------------------*/
92 
95 {
96 
97  return (a >> 52) & 0x7FF;
98 
99 }
100 
101 /*----------------------------------------------------------------------------
102 | Returns the sign bit of the double-precision floating-point value `a'.
103 *----------------------------------------------------------------------------*/
104 
105 INLINE flag
107 {
108 
109  return a >> 63;
110 
111 }
112 
113 /*----------------------------------------------------------------------------
114 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
115 | double-precision floating-point value, returning the result. After being
116 | shifted into the proper positions, the three fields are simply added
117 | together to form the result. This means that any integer portion of `zSig'
118 | will be added into the exponent. Since a properly normalized significand
119 | will have an integer portion equal to 1, the `zExp' input should be 1 less
120 | than the desired result exponent whenever `zSig' is a complete, normalized
121 | significand.
122 *----------------------------------------------------------------------------*/
123 
125 packFloat64 (flag zSign, int16 zExp, bits64 zSig)
126 {
127 
128  return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
129 
130 }
131 
132 /*----------------------------------------------------------------------------
133 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
134 | and significand `zSig', and returns the proper double-precision floating-
135 | point value corresponding to the abstract input. Ordinarily, the abstract
136 | value is simply rounded and packed into the double-precision format, with
137 | the inexact exception raised if the abstract input cannot be represented
138 | exactly. However, if the abstract value is too large, the overflow and
139 | inexact exceptions are raised and an infinity or maximal finite value is
140 | returned. If the abstract value is too small, the input value is rounded
141 | to a subnormal number, and the underflow and inexact exceptions are raised
142 | if the abstract input cannot be represented exactly as a subnormal double-
143 | precision floating-point number.
144 | The input significand `zSig' has its binary point between bits 62
145 | and 61, which is 10 bits to the left of the usual location. This shifted
146 | significand must be normalized or smaller. If `zSig' is not normalized,
147 | `zExp' must be 0; in that case, the result returned is a subnormal number,
148 | and it must not require rounding. In the usual case that `zSig' is
149 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
150 | The handling of underflow and overflow follows the IEC/IEEE Standard for
151 | Binary Floating-Point Arithmetic.
152 *----------------------------------------------------------------------------*/
153 
154 static float64
156 {
157  int8 roundingMode;
158  flag roundNearestEven, isTiny;
159  int16 roundIncrement, roundBits;
160 
161  roundingMode = float_rounding_mode;
162  roundNearestEven = (roundingMode == float_round_nearest_even);
163  roundIncrement = 0x200;
164  if (!roundNearestEven)
165  {
166  if (roundingMode == float_round_to_zero)
167  {
168  roundIncrement = 0;
169  }
170  else
171  {
172  roundIncrement = 0x3FF;
173  if (zSign)
174  {
175  if (roundingMode == float_round_up)
176  roundIncrement = 0;
177  }
178  else
179  {
180  if (roundingMode == float_round_down)
181  roundIncrement = 0;
182  }
183  }
184  }
185  roundBits = zSig & 0x3FF;
186  if (0x7FD <= (bits16) zExp)
187  {
188  if ((0x7FD < zExp)
189  || ((zExp == 0x7FD) && ((sbits64) (zSig + roundIncrement) < 0)))
190  {
191  float_raise (float_flag_overflow | float_flag_inexact);
192  return packFloat64 (zSign, 0x7FF, 0) - (roundIncrement == 0);
193  }
194  if (zExp < 0)
195  {
196  isTiny = (float_detect_tininess == float_tininess_before_rounding)
197  || (zExp < -1)
198  || (zSig + roundIncrement < LIT64 (0x8000000000000000));
199  shift64RightJamming (zSig, -zExp, &zSig);
200  zExp = 0;
201  roundBits = zSig & 0x3FF;
202  if (isTiny && roundBits)
203  float_raise (float_flag_underflow);
204  }
205  }
206  if (roundBits)
208  zSig = (zSig + roundIncrement) >> 10;
209  zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
210  if (zSig == 0)
211  zExp = 0;
212  return packFloat64 (zSign, zExp, zSig);
213 
214 }
215 
216 /*----------------------------------------------------------------------------
217 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
218 | and significand `zSig', and returns the proper double-precision floating-
219 | point value corresponding to the abstract input. This routine is just like
220 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
221 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
222 | floating-point exponent.
223 *----------------------------------------------------------------------------*/
224 
225 static float64
227 {
228  int8 shiftCount;
229 
230  shiftCount = countLeadingZeros64 (zSig) - 1;
231  return roundAndPackFloat64 (zSign, zExp - shiftCount, zSig << shiftCount);
232 
233 }
234 
235 /*----------------------------------------------------------------------------
236 | Returns the result of adding the absolute values of the double-precision
237 | floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
238 | before being returned. `zSign' is ignored if the result is a NaN.
239 | The addition is performed according to the IEC/IEEE Standard for Binary
240 | Floating-Point Arithmetic.
241 *----------------------------------------------------------------------------*/
242 
243 static float64
245 {
246  int16 aExp, bExp, zExp;
247  bits64 aSig, bSig, zSig;
248  int16 expDiff;
249 
250  aSig = extractFloat64Frac (a);
251  aExp = extractFloat64Exp (a);
252  bSig = extractFloat64Frac (b);
253  bExp = extractFloat64Exp (b);
254  expDiff = aExp - bExp;
255  aSig <<= 9;
256  bSig <<= 9;
257  if (0 < expDiff)
258  {
259  if (aExp == 0x7FF)
260  {
261  if (aSig)
262  return propagateFloat64NaN (a, b);
263  return a;
264  }
265  if (bExp == 0)
266  --expDiff;
267  else
268  bSig |= LIT64 (0x2000000000000000);
269  shift64RightJamming (bSig, expDiff, &bSig);
270  zExp = aExp;
271  }
272  else if (expDiff < 0)
273  {
274  if (bExp == 0x7FF)
275  {
276  if (bSig)
277  return propagateFloat64NaN (a, b);
278  return packFloat64 (zSign, 0x7FF, 0);
279  }
280  if (aExp == 0)
281  ++expDiff;
282  else
283  {
284  aSig |= LIT64 (0x2000000000000000);
285  }
286  shift64RightJamming (aSig, -expDiff, &aSig);
287  zExp = bExp;
288  }
289  else
290  {
291  if (aExp == 0x7FF)
292  {
293  if (aSig | bSig)
294  return propagateFloat64NaN (a, b);
295  return a;
296  }
297  if (aExp == 0)
298  return packFloat64 (zSign, 0, (aSig + bSig) >> 9);
299  zSig = LIT64 (0x4000000000000000) + aSig + bSig;
300  zExp = aExp;
301  goto roundAndPack;
302  }
303  aSig |= LIT64 (0x2000000000000000);
304  zSig = (aSig + bSig) << 1;
305  --zExp;
306  if ((sbits64) zSig < 0)
307  {
308  zSig = aSig + bSig;
309  ++zExp;
310  }
311 roundAndPack:
312  return roundAndPackFloat64 (zSign, zExp, zSig);
313 
314 }
315 
316 /*----------------------------------------------------------------------------
317 | Returns the result of subtracting the absolute values of the double-
318 | precision floating-point values `a' and `b'. If `zSign' is 1, the
319 | difference is negated before being returned. `zSign' is ignored if the
320 | result is a NaN. The subtraction is performed according to the IEC/IEEE
321 | Standard for Binary Floating-Point Arithmetic.
322 *----------------------------------------------------------------------------*/
323 
324 static float64
326 {
327  int16 aExp, bExp, zExp;
328  bits64 aSig, bSig, zSig;
329  int16 expDiff;
330 
331  aSig = extractFloat64Frac (a);
332  aExp = extractFloat64Exp (a);
333  bSig = extractFloat64Frac (b);
334  bExp = extractFloat64Exp (b);
335  expDiff = aExp - bExp;
336  aSig <<= 10;
337  bSig <<= 10;
338  if (0 < expDiff)
339  goto aExpBigger;
340  if (expDiff < 0)
341  goto bExpBigger;
342  if (aExp == 0x7FF)
343  {
344  if (aSig | bSig)
345  return propagateFloat64NaN (a, b);
346  float_raise (float_flag_invalid);
347  return float64_default_nan;
348  }
349  if (aExp == 0)
350  {
351  aExp = 1;
352  bExp = 1;
353  }
354  if (bSig < aSig)
355  goto aBigger;
356  if (aSig < bSig)
357  goto bBigger;
359 bExpBigger:
360  if (bExp == 0x7FF)
361  {
362  if (bSig)
363  return propagateFloat64NaN (a, b);
364  return packFloat64 (zSign ^ 1, 0x7FF, 0);
365  }
366  if (aExp == 0)
367  ++expDiff;
368  else
369  aSig |= LIT64 (0x4000000000000000);
370  shift64RightJamming (aSig, -expDiff, &aSig);
371  bSig |= LIT64 (0x4000000000000000);
372 bBigger:
373  zSig = bSig - aSig;
374  zExp = bExp;
375  zSign ^= 1;
376  goto normalizeRoundAndPack;
377 aExpBigger:
378  if (aExp == 0x7FF)
379  {
380  if (aSig)
381  return propagateFloat64NaN (a, b);
382  return a;
383  }
384  if (bExp == 0)
385  --expDiff;
386  else
387  bSig |= LIT64 (0x4000000000000000);
388  shift64RightJamming (bSig, expDiff, &bSig);
389  aSig |= LIT64 (0x4000000000000000);
390 aBigger:
391  zSig = aSig - bSig;
392  zExp = aExp;
393 normalizeRoundAndPack:
394  --zExp;
395  return normalizeRoundAndPackFloat64 (zSign, zExp, zSig);
396 
397 }
398 
399 /*----------------------------------------------------------------------------
400 | Returns the result of adding the double-precision floating-point values `a'
401 | and `b'. The operation is performed according to the IEC/IEEE Standard for
402 | Binary Floating-Point Arithmetic.
403 *----------------------------------------------------------------------------*/
404 
405 float64
407 {
408  flag aSign, bSign;
409 
410  aSign = extractFloat64Sign (a);
411  bSign = extractFloat64Sign (b);
412  if (aSign == bSign)
413  return addFloat64Sigs (a, b, aSign);
414  else
415  return subFloat64Sigs (a, b, aSign);
416 
417 }
INLINE int16 extractFloat64Exp(float64 a)
Definition: softfloat.c:94
int int16
Definition: SPARC-GCC.h:60
int flag
Definition: SPARC-GCC.h:58
INLINE flag extractFloat64Sign(float64 a)
Definition: softfloat.c:106
#define LIT64(a)
Definition: SPARC-GCC.h:81
unsigned short int bits16
Definition: SPARC-GCC.h:68
INLINE float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:143
#define float_round_up
Definition: softfloat.h:67
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:155
#define float_flag_invalid
Definition: softfloat.h:77
static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
Definition: softfloat.c:325
float64 float64_add(float64 a, float64 b)
Definition: softfloat.c:406
#define float_flag_overflow
Definition: softfloat.h:76
#define float_flag_underflow
Definition: softfloat.h:75
#define float_tininess_before_rounding
Definition: softfloat.h:60
#define float_round_down
Definition: softfloat.h:68
unsigned long long float64
Definition: softfloat.h:54
int8 float_rounding_mode
Definition: softfloat.c:57
int int8
Definition: SPARC-GCC.h:59
static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
Definition: softfloat.c:244
static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:226
int8 float_exception_flags
Definition: softfloat.c:58
#define float_round_nearest_even
Definition: softfloat.h:65
signed long long int sbits64
Definition: SPARC-GCC.h:71
#define float_flag_inexact
Definition: softfloat.h:73
#define float_round_to_zero
Definition: softfloat.h:66
INLINE bits64 extractFloat64Frac(float64 a)
Definition: softfloat.c:82
unsigned long long int bits64
Definition: SPARC-GCC.h:70

Generated on Mon Feb 12 2024 13:02:48 for PandA-2024.02 by doxygen 1.8.13