PandA-2024.02
softfloat.c
Go to the documentation of this file.
1 /*
2 +--------------------------------------------------------------------------+
3 | CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
4 | ======================================================================== |
5 | |
6 | * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
7 | H. Takada and K. Ishii |
8 | Nagoya University, Japan |
9 | |
10 | * Remark : |
11 | 1. This source code is modified to unify the formats of the benchmark |
12 | programs in CHStone. |
13 | 2. Test vectors are added for CHStone. |
14 | 3. If "main_result" is 0 at the end of the program, the program is |
15 | correctly executed. |
16 | 4. Please follow the copyright of each benchmark program. |
17 +--------------------------------------------------------------------------+
18 */
19 /*============================================================================
20 
21 This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
22 Package, Release 2b.
23 
24 Written by John R. Hauser. This work was made possible in part by the
25 International Computer Science Institute, located at Suite 600, 1947 Center
26 Street, Berkeley, California 94704. Funding was partially provided by the
27 National Science Foundation under grant MIP-9311980. The original version
28 of this code was written as part of a project to build a fixed-point vector
29 processor in collaboration with the University of California at Berkeley,
30 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
31 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
32 arithmetic/SoftFloat.html'.
33 
34 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
35 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
36 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
37 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
38 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
39 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
40 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
41 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
42 
43 Derivative works are acceptable, even for commercial purposes, so long as
44 (1) the source code for the derivative work includes prominent notice that
45 the work is derivative, and (2) the source code includes prominent notice with
46 these four paragraphs for those parts of this code that are retained.
47 
48 =============================================================================*/
49 
50 #include "milieu.h"
51 #include "softfloat.h"
52 
53 /*----------------------------------------------------------------------------
54 | Floating-point rounding mode, extended double-precision rounding precision,
55 | and exception flags.
56 *----------------------------------------------------------------------------*/
59 
60 /*----------------------------------------------------------------------------
61 | Primitive arithmetic functions, including multi-word arithmetic, and
62 | division and square root approximations. (Can be specialized to target if
63 | desired.)
64 *----------------------------------------------------------------------------*/
65 #include "softfloat-macros"
66 
67 /*----------------------------------------------------------------------------
68 | Functions and definitions to determine: (1) whether tininess for underflow
69 | is detected before or after rounding by default, (2) what (if anything)
70 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
71 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
72 | are propagated from function inputs to output. These details are target-
73 | specific.
74 *----------------------------------------------------------------------------*/
75 #include "softfloat-specialize"
76 
77 /*----------------------------------------------------------------------------
78 | Returns the fraction bits of the double-precision floating-point value `a'.
79 *----------------------------------------------------------------------------*/
80 
83 {
84 
85  return a & LIT64 (0x000FFFFFFFFFFFFF);
86 
87 }
88 
89 /*----------------------------------------------------------------------------
90 | Returns the exponent bits of the double-precision floating-point value `a'.
91 *----------------------------------------------------------------------------*/
92 
95 {
96 
97  return (a >> 52) & 0x7FF;
98 
99 }
100 
101 /*----------------------------------------------------------------------------
102 | Returns the sign bit of the double-precision floating-point value `a'.
103 *----------------------------------------------------------------------------*/
104 
105 INLINE flag
107 {
108 
109  return a >> 63;
110 
111 }
112 
113 /*----------------------------------------------------------------------------
114 | Normalizes the subnormal double-precision floating-point value represented
115 | by the denormalized significand `aSig'. The normalized exponent and
116 | significand are stored at the locations pointed to by `zExpPtr' and
117 | `zSigPtr', respectively.
118 *----------------------------------------------------------------------------*/
119 
120 static void
121 normalizeFloat64Subnormal (bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
122 {
123  int8 shiftCount;
124 
125  shiftCount = countLeadingZeros64 (aSig) - 11;
126  *zSigPtr = aSig << shiftCount;
127  *zExpPtr = 1 - shiftCount;
128 
129 }
130 
131 /*----------------------------------------------------------------------------
132 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
133 | double-precision floating-point value, returning the result. After being
134 | shifted into the proper positions, the three fields are simply added
135 | together to form the result. This means that any integer portion of `zSig'
136 | will be added into the exponent. Since a properly normalized significand
137 | will have an integer portion equal to 1, the `zExp' input should be 1 less
138 | than the desired result exponent whenever `zSig' is a complete, normalized
139 | significand.
140 *----------------------------------------------------------------------------*/
141 
143 packFloat64 (flag zSign, int16 zExp, bits64 zSig)
144 {
145 
146  return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147 
148 }
149 
150 /*----------------------------------------------------------------------------
151 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
152 | and significand `zSig', and returns the proper double-precision floating-
153 | point value corresponding to the abstract input. Ordinarily, the abstract
154 | value is simply rounded and packed into the double-precision format, with
155 | the inexact exception raised if the abstract input cannot be represented
156 | exactly. However, if the abstract value is too large, the overflow and
157 | inexact exceptions are raised and an infinity or maximal finite value is
158 | returned. If the abstract value is too small, the input value is rounded
159 | to a subnormal number, and the underflow and inexact exceptions are raised
160 | if the abstract input cannot be represented exactly as a subnormal double-
161 | precision floating-point number.
162 | The input significand `zSig' has its binary point between bits 62
163 | and 61, which is 10 bits to the left of the usual location. This shifted
164 | significand must be normalized or smaller. If `zSig' is not normalized,
165 | `zExp' must be 0; in that case, the result returned is a subnormal number,
166 | and it must not require rounding. In the usual case that `zSig' is
167 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
168 | The handling of underflow and overflow follows the IEC/IEEE Standard for
169 | Binary Floating-Point Arithmetic.
170 *----------------------------------------------------------------------------*/
171 
172 static float64
174 {
175  int8 roundingMode;
176  flag roundNearestEven, isTiny;
177  int16 roundIncrement, roundBits;
178 
179  roundingMode = float_rounding_mode;
180  roundNearestEven = (roundingMode == float_round_nearest_even);
181  roundIncrement = 0x200;
182  if (!roundNearestEven)
183  {
184  if (roundingMode == float_round_to_zero)
185  {
186  roundIncrement = 0;
187  }
188  else
189  {
190  roundIncrement = 0x3FF;
191  if (zSign)
192  {
193  if (roundingMode == float_round_up)
194  roundIncrement = 0;
195  }
196  else
197  {
198  if (roundingMode == float_round_down)
199  roundIncrement = 0;
200  }
201  }
202  }
203  roundBits = zSig & 0x3FF;
204  if (0x7FD <= (bits16) zExp)
205  {
206  if ((0x7FD < zExp)
207  || ((zExp == 0x7FD) && ((sbits64) (zSig + roundIncrement) < 0)))
208  {
209  float_raise (float_flag_overflow | float_flag_inexact);
210  return packFloat64 (zSign, 0x7FF, 0) - (roundIncrement == 0);
211  }
212  if (zExp < 0)
213  {
214  isTiny = (float_detect_tininess == float_tininess_before_rounding)
215  || (zExp < -1)
216  || (zSig + roundIncrement < LIT64 (0x8000000000000000));
217  shift64RightJamming (zSig, -zExp, &zSig);
218  zExp = 0;
219  roundBits = zSig & 0x3FF;
220  if (isTiny && roundBits)
221  float_raise (float_flag_underflow);
222  }
223  }
224  if (roundBits)
226  zSig = (zSig + roundIncrement) >> 10;
227  zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
228  if (zSig == 0)
229  zExp = 0;
230  return packFloat64 (zSign, zExp, zSig);
231 
232 }
233 
234 /*----------------------------------------------------------------------------
235 | Returns the result of multiplying the double-precision floating-point values
236 | `a' and `b'. The operation is performed according to the IEC/IEEE Standard
237 | for Binary Floating-Point Arithmetic.
238 *----------------------------------------------------------------------------*/
239 
240 float64
241 __attribute__ ((noinline))
243 {
244  flag aSign, bSign, zSign;
245  int16 aExp, bExp, zExp;
246  bits64 aSig, bSig, zSig0, zSig1;
247 
248  aSig = extractFloat64Frac (a);
249  aExp = extractFloat64Exp (a);
250  aSign = extractFloat64Sign (a);
251  bSig = extractFloat64Frac (b);
252  bExp = extractFloat64Exp (b);
253  bSign = extractFloat64Sign (b);
254  zSign = aSign ^ bSign;
255  if (aExp == 0x7FF)
256  {
257  if (aSig || ((bExp == 0x7FF) && bSig))
258  return propagateFloat64NaN (a, b);
259  if ((bExp | bSig) == 0)
260  {
261  float_raise (float_flag_invalid);
262  return float64_default_nan;
263  }
264  return packFloat64 (zSign, 0x7FF, 0);
265  }
266  if (bExp == 0x7FF)
267  {
268  if (bSig)
269  return propagateFloat64NaN (a, b);
270  if ((aExp | aSig) == 0)
271  {
272  float_raise (float_flag_invalid);
273  return float64_default_nan;
274  }
275  return packFloat64 (zSign, 0x7FF, 0);
276  }
277  if (aExp == 0)
278  {
279  if (aSig == 0)
280  return packFloat64 (zSign, 0, 0);
281  normalizeFloat64Subnormal (aSig, &aExp, &aSig);
282  }
283  if (bExp == 0)
284  {
285  if (bSig == 0)
286  return packFloat64 (zSign, 0, 0);
287  normalizeFloat64Subnormal (bSig, &bExp, &bSig);
288  }
289  zExp = aExp + bExp - 0x3FF;
290  aSig = (aSig | LIT64 (0x0010000000000000)) << 10;
291  bSig = (bSig | LIT64 (0x0010000000000000)) << 11;
292  mul64To128 (aSig, bSig, &zSig0, &zSig1);
293  zSig0 |= (zSig1 != 0);
294  if (0 <= (sbits64) (zSig0 << 1))
295  {
296  zSig0 <<= 1;
297  --zExp;
298  }
299  return roundAndPackFloat64 (zSign, zExp, zSig0);
300 
301 }
INLINE int16 extractFloat64Exp(float64 a)
Definition: softfloat.c:94
int int16
Definition: SPARC-GCC.h:60
int flag
Definition: SPARC-GCC.h:58
INLINE flag extractFloat64Sign(float64 a)
Definition: softfloat.c:106
#define LIT64(a)
Definition: SPARC-GCC.h:81
unsigned short int bits16
Definition: SPARC-GCC.h:68
INLINE float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:143
#define float_round_up
Definition: softfloat.h:67
float64 float64_mul(float64 a, float64 b)
Definition: softfloat.c:241
static void normalizeFloat64Subnormal(bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr)
Definition: softfloat.c:121
#define float_flag_invalid
Definition: softfloat.h:77
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:173
#define float_flag_overflow
Definition: softfloat.h:76
#define float_flag_underflow
Definition: softfloat.h:75
#define float_tininess_before_rounding
Definition: softfloat.h:60
#define float_round_down
Definition: softfloat.h:68
unsigned long long float64
Definition: softfloat.h:54
float64 __attribute__((noinline))
Definition: softfloat.c:406
int8 float_rounding_mode
Definition: softfloat.c:57
int int8
Definition: SPARC-GCC.h:59
int8 float_exception_flags
Definition: softfloat.c:58
#define float_round_nearest_even
Definition: softfloat.h:65
signed long long int sbits64
Definition: SPARC-GCC.h:71
#define float_flag_inexact
Definition: softfloat.h:73
#define float_round_to_zero
Definition: softfloat.h:66
INLINE bits64 extractFloat64Frac(float64 a)
Definition: softfloat.c:82
unsigned long long int bits64
Definition: SPARC-GCC.h:70

Generated on Mon Feb 12 2024 13:02:48 for PandA-2024.02 by doxygen 1.8.13