PandA-2024.02
softfloat.c
Go to the documentation of this file.
1 /*
2 +--------------------------------------------------------------------------+
3 | CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
4 | ======================================================================== |
5 | |
6 | * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
7 | H. Takada and K. Ishii |
8 | Nagoya University, Japan |
9 | |
10 | * Remark : |
11 | 1. This source code is modified to unify the formats of the benchmark |
12 | programs in CHStone. |
13 | 2. Test vectors are added for CHStone. |
14 | 3. If "main_result" is 0 at the end of the program, the program is |
15 | correctly executed. |
16 | 4. Please follow the copyright of each benchmark program. |
17 +--------------------------------------------------------------------------+
18 */
19 /*============================================================================
20 
21 This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
22 Package, Release 2b.
23 
24 Written by John R. Hauser. This work was made possible in part by the
25 International Computer Science Institute, located at Suite 600, 1947 Center
26 Street, Berkeley, California 94704. Funding was partially provided by the
27 National Science Foundation under grant MIP-9311980. The original version
28 of this code was written as part of a project to build a fixed-point vector
29 processor in collaboration with the University of California at Berkeley,
30 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
31 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
32 arithmetic/SoftFloat.html'.
33 
34 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
35 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
36 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
37 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
38 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
39 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
40 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
41 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
42 
43 Derivative works are acceptable, even for commercial purposes, so long as
44 (1) the source code for the derivative work includes prominent notice that
45 the work is derivative, and (2) the source code includes prominent notice with
46 these four paragraphs for those parts of this code that are retained.
47 
48 =============================================================================*/
49 
50 #include "milieu.h"
51 #include "softfloat.h"
52 
53 /*----------------------------------------------------------------------------
54 | Floating-point rounding mode, extended double-precision rounding precision,
55 | and exception flags.
56 *----------------------------------------------------------------------------*/
59 
60 /*----------------------------------------------------------------------------
61 | Primitive arithmetic functions, including multi-word arithmetic, and
62 | division and square root approximations. (Can be specialized to target if
63 | desired.)
64 *----------------------------------------------------------------------------*/
65 #include "softfloat-macros"
66 
67 /*----------------------------------------------------------------------------
68 | Functions and definitions to determine: (1) whether tininess for underflow
69 | is detected before or after rounding by default, (2) what (if anything)
70 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
71 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
72 | are propagated from function inputs to output. These details are target-
73 | specific.
74 *----------------------------------------------------------------------------*/
75 #include "softfloat-specialize"
76 
77 /*----------------------------------------------------------------------------
78 | Returns the fraction bits of the double-precision floating-point value `a'.
79 *----------------------------------------------------------------------------*/
80 
83 {
84 
85  return a & LIT64 (0x000FFFFFFFFFFFFF);
86 
87 }
88 
89 /*----------------------------------------------------------------------------
90 | Returns the exponent bits of the double-precision floating-point value `a'.
91 *----------------------------------------------------------------------------*/
92 
95 {
96 
97  return (a >> 52) & 0x7FF;
98 
99 }
100 
101 /*----------------------------------------------------------------------------
102 | Returns the sign bit of the double-precision floating-point value `a'.
103 *----------------------------------------------------------------------------*/
104 
105 INLINE flag
107 {
108 
109  return a >> 63;
110 
111 }
112 
113 /*----------------------------------------------------------------------------
114 | Normalizes the subnormal double-precision floating-point value represented
115 | by the denormalized significand `aSig'. The normalized exponent and
116 | significand are stored at the locations pointed to by `zExpPtr' and
117 | `zSigPtr', respectively.
118 *----------------------------------------------------------------------------*/
119 
120 static void
121 normalizeFloat64Subnormal (bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
122 {
123  int8 shiftCount;
124 
125  shiftCount = countLeadingZeros64 (aSig) - 11;
126  *zSigPtr = aSig << shiftCount;
127  *zExpPtr = 1 - shiftCount;
128 
129 }
130 
131 /*----------------------------------------------------------------------------
132 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
133 | double-precision floating-point value, returning the result. After being
134 | shifted into the proper positions, the three fields are simply added
135 | together to form the result. This means that any integer portion of `zSig'
136 | will be added into the exponent. Since a properly normalized significand
137 | will have an integer portion equal to 1, the `zExp' input should be 1 less
138 | than the desired result exponent whenever `zSig' is a complete, normalized
139 | significand.
140 *----------------------------------------------------------------------------*/
141 
143 packFloat64 (flag zSign, int16 zExp, bits64 zSig)
144 {
145 
146  return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147 
148 }
149 
150 /*----------------------------------------------------------------------------
151 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
152 | and significand `zSig', and returns the proper double-precision floating-
153 | point value corresponding to the abstract input. Ordinarily, the abstract
154 | value is simply rounded and packed into the double-precision format, with
155 | the inexact exception raised if the abstract input cannot be represented
156 | exactly. However, if the abstract value is too large, the overflow and
157 | inexact exceptions are raised and an infinity or maximal finite value is
158 | returned. If the abstract value is too small, the input value is rounded
159 | to a subnormal number, and the underflow and inexact exceptions are raised
160 | if the abstract input cannot be represented exactly as a subnormal double-
161 | precision floating-point number.
162 | The input significand `zSig' has its binary point between bits 62
163 | and 61, which is 10 bits to the left of the usual location. This shifted
164 | significand must be normalized or smaller. If `zSig' is not normalized,
165 | `zExp' must be 0; in that case, the result returned is a subnormal number,
166 | and it must not require rounding. In the usual case that `zSig' is
167 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
168 | The handling of underflow and overflow follows the IEC/IEEE Standard for
169 | Binary Floating-Point Arithmetic.
170 *----------------------------------------------------------------------------*/
171 
172 static float64
174 {
175  int8 roundingMode;
176  flag roundNearestEven, isTiny;
177  int16 roundIncrement, roundBits;
178 
179  roundingMode = float_rounding_mode;
180  roundNearestEven = (roundingMode == float_round_nearest_even);
181  roundIncrement = 0x200;
182  if (!roundNearestEven)
183  {
184  if (roundingMode == float_round_to_zero)
185  {
186  roundIncrement = 0;
187  }
188  else
189  {
190  roundIncrement = 0x3FF;
191  if (zSign)
192  {
193  if (roundingMode == float_round_up)
194  roundIncrement = 0;
195  }
196  else
197  {
198  if (roundingMode == float_round_down)
199  roundIncrement = 0;
200  }
201  }
202  }
203  roundBits = zSig & 0x3FF;
204  if (0x7FD <= (bits16) zExp)
205  {
206  if ((0x7FD < zExp)
207  || ((zExp == 0x7FD) && ((sbits64) (zSig + roundIncrement) < 0)))
208  {
209  float_raise (float_flag_overflow | float_flag_inexact);
210  return packFloat64 (zSign, 0x7FF, 0) - (roundIncrement == 0);
211  }
212  if (zExp < 0)
213  {
214  isTiny = (float_detect_tininess == float_tininess_before_rounding)
215  || (zExp < -1)
216  || (zSig + roundIncrement < LIT64 (0x8000000000000000));
217  shift64RightJamming (zSig, -zExp, &zSig);
218  zExp = 0;
219  roundBits = zSig & 0x3FF;
220  if (isTiny && roundBits)
221  float_raise (float_flag_underflow);
222  }
223  }
224  if (roundBits)
226  zSig = (zSig + roundIncrement) >> 10;
227  zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
228  if (zSig == 0)
229  zExp = 0;
230  return packFloat64 (zSign, zExp, zSig);
231 
232 }
233 
234 /*----------------------------------------------------------------------------
235 | Returns the result of dividing the double-precision floating-point value `a'
236 | by the corresponding value `b'. The operation is performed according to
237 | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
238 *----------------------------------------------------------------------------*/
239 
240 float64
242 {
243  flag aSign, bSign, zSign;
244  int16 aExp, bExp, zExp;
245  bits64 aSig, bSig, zSig;
246  bits64 rem0, rem1, term0, term1;
247 
248  aSig = extractFloat64Frac (a);
249  aExp = extractFloat64Exp (a);
250  aSign = extractFloat64Sign (a);
251  bSig = extractFloat64Frac (b);
252  bExp = extractFloat64Exp (b);
253  bSign = extractFloat64Sign (b);
254  zSign = aSign ^ bSign;
255  if (aExp == 0x7FF)
256  {
257  if (aSig)
258  return propagateFloat64NaN (a, b);
259  if (bExp == 0x7FF)
260  {
261  if (bSig)
262  return propagateFloat64NaN (a, b);
263  float_raise (float_flag_invalid);
264  return float64_default_nan;
265  }
266  return packFloat64 (zSign, 0x7FF, 0);
267  }
268  if (bExp == 0x7FF)
269  {
270  if (bSig)
271  return propagateFloat64NaN (a, b);
272  return packFloat64 (zSign, 0, 0);
273  }
274  if (bExp == 0)
275  {
276  if (bSig == 0)
277  {
278  if ((aExp | aSig) == 0)
279  {
280  float_raise (float_flag_invalid);
281  return float64_default_nan;
282  }
283  float_raise (float_flag_divbyzero);
284  return packFloat64 (zSign, 0x7FF, 0);
285  }
286  normalizeFloat64Subnormal (bSig, &bExp, &bSig);
287  }
288  if (aExp == 0)
289  {
290  if (aSig == 0)
291  return packFloat64 (zSign, 0, 0);
292  normalizeFloat64Subnormal (aSig, &aExp, &aSig);
293  }
294  zExp = aExp - bExp + 0x3FD;
295  aSig = (aSig | LIT64 (0x0010000000000000)) << 10;
296  bSig = (bSig | LIT64 (0x0010000000000000)) << 11;
297  if (bSig <= (aSig + aSig))
298  {
299  aSig >>= 1;
300  ++zExp;
301  }
302  zSig = estimateDiv128To64 (aSig, 0, bSig);
303  if ((zSig & 0x1FF) <= 2)
304  {
305  mul64To128 (bSig, zSig, &term0, &term1);
306  sub128 (aSig, 0, term0, term1, &rem0, &rem1);
307  while ((sbits64) rem0 < 0)
308  {
309  --zSig;
310  add128 (rem0, rem1, 0, bSig, &rem0, &rem1);
311  }
312  zSig |= (rem1 != 0);
313  }
314  return roundAndPackFloat64 (zSign, zExp, zSig);
315 
316 }
float64 float64_div(float64 a, float64 b)
Definition: softfloat.c:241
INLINE int16 extractFloat64Exp(float64 a)
Definition: softfloat.c:94
int int16
Definition: SPARC-GCC.h:60
int flag
Definition: SPARC-GCC.h:58
INLINE flag extractFloat64Sign(float64 a)
Definition: softfloat.c:106
#define LIT64(a)
Definition: SPARC-GCC.h:81
#define float_flag_divbyzero
Definition: softfloat.h:74
unsigned short int bits16
Definition: SPARC-GCC.h:68
INLINE float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:143
static void normalizeFloat64Subnormal(bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr)
Definition: softfloat.c:121
#define float_round_up
Definition: softfloat.h:67
#define float_flag_invalid
Definition: softfloat.h:77
#define float_flag_overflow
Definition: softfloat.h:76
#define float_flag_underflow
Definition: softfloat.h:75
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
Definition: softfloat.c:173
#define float_tininess_before_rounding
Definition: softfloat.h:60
#define float_round_down
Definition: softfloat.h:68
unsigned long long float64
Definition: softfloat.h:54
int8 float_rounding_mode
Definition: softfloat.c:57
int int8
Definition: SPARC-GCC.h:59
int8 float_exception_flags
Definition: softfloat.c:58
#define float_round_nearest_even
Definition: softfloat.h:65
signed long long int sbits64
Definition: SPARC-GCC.h:71
#define float_flag_inexact
Definition: softfloat.h:73
#define float_round_to_zero
Definition: softfloat.h:66
INLINE bits64 extractFloat64Frac(float64 a)
Definition: softfloat.c:82
unsigned long long int bits64
Definition: SPARC-GCC.h:70

Generated on Mon Feb 12 2024 13:02:48 for PandA-2024.02 by doxygen 1.8.13