PandA-2024.02
softfloat.c
Go to the documentation of this file.
1 /*
2 +--------------------------------------------------------------------------+
3 | CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
4 | ======================================================================== |
5 | |
6 | * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
7 | H. Takada and K. Ishii |
8 | Nagoya University, Japan |
9 | |
10 | * Remark : |
11 | 1. This source code is modified to unify the formats of the benchmark |
12 | programs in CHStone. |
13 | 2. Test vectors are added for CHStone. |
14 | 3. If "main_result" is 0 at the end of the program, the program is |
15 | correctly executed. |
16 | 4. Please follow the copyright of each benchmark program. |
17 +--------------------------------------------------------------------------+
18 */
19 /*============================================================================
20 
21 This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
22 Package, Release 2b.
23 
24 Written by John R. Hauser. This work was made possible in part by the
25 International Computer Science Institute, located at Suite 600, 1947 Center
26 Street, Berkeley, California 94704. Funding was partially provided by the
27 National Science Foundation under grant MIP-9311980. The original version
28 of this code was written as part of a project to build a fixed-point vector
29 processor in collaboration with the University of California at Berkeley,
30 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
31 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
32 arithmetic/SoftFloat.html'.
33 
34 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
35 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
36 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
37 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
38 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
39 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
40 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
41 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
42 
43 Derivative works are acceptable, even for commercial purposes, so long as
44 (1) the source code for the derivative work includes prominent notice that
45 the work is derivative, and (2) the source code includes prominent notice with
46 these four paragraphs for those parts of this code that are retained.
47 
48 =============================================================================*/
49 
50 #include "milieu.h"
51 #include "softfloat.h"
52 
53 /*----------------------------------------------------------------------------
54 | Floating-point rounding mode, extended double-precision rounding precision,
55 | and exception flags.
56 *----------------------------------------------------------------------------*/
59 
60 /*----------------------------------------------------------------------------
61 | Primitive arithmetic functions, including multi-word arithmetic, and
62 | division and square root approximations. (Can be specialized to target if
63 | desired.)
64 *----------------------------------------------------------------------------*/
65 #include "softfloat-macros"
66 
67 /*----------------------------------------------------------------------------
68 | Functions and definitions to determine: (1) whether tininess for underflow
69 | is detected before or after rounding by default, (2) what (if anything)
70 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
71 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
72 | are propagated from function inputs to output. These details are target-
73 | specific.
74 *----------------------------------------------------------------------------*/
75 #include "softfloat-specialize"
76 
77 /*----------------------------------------------------------------------------
78 | Returns the fraction bits of the double-precision floating-point value `a'.
79 *----------------------------------------------------------------------------*/
80 
83 {
84 
85  return a & LIT64 (0x000FFFFFFFFFFFFF);
86 
87 }
88 
89 /*----------------------------------------------------------------------------
90 | Returns the exponent bits of the double-precision floating-point value `a'.
91 *----------------------------------------------------------------------------*/
92 
95 {
96 
97  return (a >> 52) & 0x7FF;
98 
99 }
100 
101 /*----------------------------------------------------------------------------
102 | Returns the sign bit of the double-precision floating-point value `a'.
103 *----------------------------------------------------------------------------*/
104 
105 INLINE flag
107 {
108 
109  return a >> 63;
110 
111 }
112 
113 /*----------------------------------------------------------------------------
114 | Normalizes the subnormal double-precision floating-point value represented
115 | by the denormalized significand `aSig'. The normalized exponent and
116 | significand are stored at the locations pointed to by `zExpPtr' and
117 | `zSigPtr', respectively.
118 *----------------------------------------------------------------------------*/
119 
120 static void
121 normalizeFloat64Subnormal (bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
122 {
123  int8 shiftCount;
124 
125  shiftCount = countLeadingZeros64 (aSig) - 11;
126  *zSigPtr = aSig << shiftCount;
127  *zExpPtr = 1 - shiftCount;
128 
129 }
130 
131 /*----------------------------------------------------------------------------
132 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
133 | double-precision floating-point value, returning the result. After being
134 | shifted into the proper positions, the three fields are simply added
135 | together to form the result. This means that any integer portion of `zSig'
136 | will be added into the exponent. Since a properly normalized significand
137 | will have an integer portion equal to 1, the `zExp' input should be 1 less
138 | than the desired result exponent whenever `zSig' is a complete, normalized
139 | significand.
140 *----------------------------------------------------------------------------*/
141 
143 packFloat64 (flag zSign, int16 zExp, bits64 zSig)
144 {
145 
146  return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147 
148 }
149 
150 /*----------------------------------------------------------------------------
151 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
152 | and significand `zSig', and returns the proper double-precision floating-
153 | point value corresponding to the abstract input. Ordinarily, the abstract
154 | value is simply rounded and packed into the double-precision format, with
155 | the inexact exception raised if the abstract input cannot be represented
156 | exactly. However, if the abstract value is too large, the overflow and
157 | inexact exceptions are raised and an infinity or maximal finite value is
158 | returned. If the abstract value is too small, the input value is rounded
159 | to a subnormal number, and the underflow and inexact exceptions are raised
160 | if the abstract input cannot be represented exactly as a subnormal double-
161 | precision floating-point number.
162 | The input significand `zSig' has its binary point between bits 62
163 | and 61, which is 10 bits to the left of the usual location. This shifted
164 | significand must be normalized or smaller. If `zSig' is not normalized,
165 | `zExp' must be 0; in that case, the result returned is a subnormal number,
166 | and it must not require rounding. In the usual case that `zSig' is
167 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
168 | The handling of underflow and overflow follows the IEC/IEEE Standard for
169 | Binary Floating-Point Arithmetic.
170 *----------------------------------------------------------------------------*/
171 
172 static float64
174 {
175  int8 roundingMode;
176  flag roundNearestEven, isTiny;
177  int16 roundIncrement, roundBits;
178 
179  roundingMode = float_rounding_mode;
180  roundNearestEven = (roundingMode == float_round_nearest_even);
181  roundIncrement = 0x200;
182  if (!roundNearestEven)
183  {
184  if (roundingMode == float_round_to_zero)
185  {
186  roundIncrement = 0;
187  }
188  else
189  {
190  roundIncrement = 0x3FF;
191  if (zSign)
192  {
193  if (roundingMode == float_round_up)
194  roundIncrement = 0;
195  }
196  else
197  {
198  if (roundingMode == float_round_down)
199  roundIncrement = 0;
200  }
201  }
202  }
203  roundBits = zSig & 0x3FF;
204  if (0x7FD <= (bits16) zExp)
205  {
206  if ((0x7FD < zExp)
207  || ((zExp == 0x7FD) && ((sbits64) (zSig + roundIncrement) < 0)))
208  {
209  float_raise (float_flag_overflow | float_flag_inexact);
210  return packFloat64 (zSign, 0x7FF, 0) - (roundIncrement == 0);
211  }
212  if (zExp < 0)
213  {
214  isTiny = (float_detect_tininess == float_tininess_before_rounding)
215  || (zExp < -1)
216  || (zSig + roundIncrement < LIT64 (0x8000000000000000));
217  shift64RightJamming (zSig, -zExp, &zSig);
218  zExp = 0;
219  roundBits = zSig & 0x3FF;
220  if (isTiny && roundBits)
221  float_raise (float_flag_underflow);
222  }
223  }
224  if (roundBits)
226  zSig = (zSig + roundIncrement) >> 10;
227  zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
228  if (zSig == 0)
229  zExp = 0;
230  return packFloat64 (zSign, zExp, zSig);
231 
232 }
233 
234 /*----------------------------------------------------------------------------
235 | Returns the result of dividing the double-precision floating-point value `a'
236 | by the corresponding value `b'. The operation is performed according to
237 | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
238 *----------------------------------------------------------------------------*/
239 
240 float64
241 __attribute__ ((noinline))
243 {
244  flag aSign, bSign, zSign;
245  int16 aExp, bExp, zExp;
246  bits64 aSig, bSig, zSig;
247  bits64 rem0, rem1, term0, term1;
248 
249  aSig = extractFloat64Frac (a);
250  aExp = extractFloat64Exp (a);
251  aSign = extractFloat64Sign (a);
252  bSig = extractFloat64Frac (b);
253  bExp = extractFloat64Exp (b);
254  bSign = extractFloat64Sign (b);
255  zSign = aSign ^ bSign;
256  if (aExp == 0x7FF)
257  {
258  if (aSig)
259  return propagateFloat64NaN (a, b);
260  if (bExp == 0x7FF)
261  {
262  if (bSig)
263  return propagateFloat64NaN (a, b);
264  float_raise (float_flag_invalid);
265  return float64_default_nan;
266  }
267  return packFloat64 (zSign, 0x7FF, 0);
268  }
269  if (bExp == 0x7FF)
270  {
271  if (bSig)
272  return propagateFloat64NaN (a, b);
273  return packFloat64 (zSign, 0, 0);
274  }
275  if (bExp == 0)
276  {
277  if (bSig == 0)
278  {
279  if ((aExp | aSig) == 0)
280  {
281  float_raise (float_flag_invalid);
282  return float64_default_nan;
283  }
284  float_raise (float_flag_divbyzero);
285  return packFloat64 (zSign, 0x7FF, 0);
286  }
287  normalizeFloat64Subnormal (bSig, &bExp, &bSig);
288  }
289  if (aExp == 0)
290  {
291  if (aSig == 0)
292  return packFloat64 (zSign, 0, 0);
293  normalizeFloat64Subnormal (aSig, &aExp, &aSig);
294  }
295  zExp = aExp - bExp + 0x3FD;
296  aSig = (aSig | LIT64 (0x0010000000000000)) << 10;
297  bSig = (bSig | LIT64 (0x0010000000000000)) << 11;
298  if (bSig <= (aSig + aSig))
299  {
300  aSig >>= 1;
301  ++zExp;
302  }
303  zSig = estimateDiv128To64 (aSig, 0, bSig);
304  if ((zSig & 0x1FF) <= 2)
305  {
306  mul64To128 (bSig, zSig, &term0, &term1);
307  sub128 (aSig, 0, term0, term1, &rem0, &rem1);
308  while ((sbits64) rem0 < 0)
309  {
310  --zSig;
311  add128 (rem0, rem1, 0, bSig, &rem0, &rem1);
312  }
313  zSig |= (rem1 != 0);
314  }
315  return roundAndPackFloat64 (zSign, zExp, zSig);
316 
317 }
float64 float64_div(float64 a, float64 b)
Definition: softfloat.c:241
INLINE int16 extractFloat64Exp(float64 a)
Definition: softfloat.c:94
int int16
Definition: SPARC-GCC.h:60
int flag
Definition: SPARC-GCC.h:58
INLINE flag extractFloat64Sign(float64 a)
Definition: softfloat.c:106
#define LIT64(a)
Definition: SPARC-GCC.h:81
#define float_flag_divbyzero
Definition: softfloat.h:74
unsigned short int bits16
Definition: SPARC-GCC.h:68
INLINE float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:143
static void normalizeFloat64Subnormal(bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr)
Definition: softfloat.c:121
#define float_round_up
Definition: softfloat.h:67
#define float_flag_invalid
Definition: softfloat.h:77
#define float_flag_overflow
Definition: softfloat.h:76
#define float_flag_underflow
Definition: softfloat.h:75
#define float_tininess_before_rounding
Definition: softfloat.h:60
#define float_round_down
Definition: softfloat.h:68
unsigned long long float64
Definition: softfloat.h:54
float64 __attribute__((noinline))
Definition: softfloat.c:406
int8 float_rounding_mode
Definition: softfloat.c:57
int int8
Definition: SPARC-GCC.h:59
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:173
int8 float_exception_flags
Definition: softfloat.c:58
#define float_round_nearest_even
Definition: softfloat.h:65
signed long long int sbits64
Definition: SPARC-GCC.h:71
#define float_flag_inexact
Definition: softfloat.h:73
#define float_round_to_zero
Definition: softfloat.h:66
INLINE bits64 extractFloat64Frac(float64 a)
Definition: softfloat.c:82
unsigned long long int bits64
Definition: SPARC-GCC.h:70

Generated on Mon Feb 12 2024 13:02:48 for PandA-2024.02 by doxygen 1.8.13