65 #include "softfloat-macros" 75 #include "softfloat-specialize" 85 return a &
LIT64 (0x000FFFFFFFFFFFFF);
97 return (a >> 52) & 0x7FF;
125 shiftCount = countLeadingZeros64 (aSig) - 11;
126 *zSigPtr = aSig << shiftCount;
127 *zExpPtr = 1 - shiftCount;
146 return (((
bits64) zSign) << 63) + (((
bits64) zExp) << 52) + zSig;
176 flag roundNearestEven, isTiny;
177 int16 roundIncrement, roundBits;
181 roundIncrement = 0x200;
182 if (!roundNearestEven)
190 roundIncrement = 0x3FF;
203 roundBits = zSig & 0x3FF;
204 if (0x7FD <= (
bits16) zExp)
207 || ((zExp == 0x7FD) && ((
sbits64) (zSig + roundIncrement) < 0)))
210 return packFloat64 (zSign, 0x7FF, 0) - (roundIncrement == 0);
216 || (zSig + roundIncrement <
LIT64 (0x8000000000000000));
217 shift64RightJamming (zSig, -zExp, &zSig);
219 roundBits = zSig & 0x3FF;
220 if (isTiny && roundBits)
226 zSig = (zSig + roundIncrement) >> 10;
227 zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
248 shiftCount = countLeadingZeros64 (zSig) - 1;
270 absA = zSign ? -a : a;
271 shiftCount = countLeadingZeros32 (absA) + 21;
273 return packFloat64 (zSign, 0x432 - shiftCount, zSig << shiftCount);
288 int16 aExp, bExp, zExp;
296 expDiff = aExp - bExp;
304 return propagateFloat64NaN (a, b);
310 bSig |=
LIT64 (0x2000000000000000);
311 shift64RightJamming (bSig, expDiff, &bSig);
314 else if (expDiff < 0)
319 return propagateFloat64NaN (a, b);
326 aSig |=
LIT64 (0x2000000000000000);
328 shift64RightJamming (aSig, -expDiff, &aSig);
336 return propagateFloat64NaN (a, b);
341 zSig =
LIT64 (0x4000000000000000) + aSig + bSig;
345 aSig |=
LIT64 (0x2000000000000000);
346 zSig = (aSig + bSig) << 1;
369 int16 aExp, bExp, zExp;
377 expDiff = aExp - bExp;
387 return propagateFloat64NaN (a, b);
389 return float64_default_nan;
405 return propagateFloat64NaN (a, b);
411 aSig |=
LIT64 (0x4000000000000000);
412 shift64RightJamming (aSig, -expDiff, &aSig);
413 bSig |=
LIT64 (0x4000000000000000);
418 goto normalizeRoundAndPack;
423 return propagateFloat64NaN (a, b);
429 bSig |=
LIT64 (0x4000000000000000);
430 shift64RightJamming (bSig, expDiff, &bSig);
431 aSig |=
LIT64 (0x4000000000000000);
435 normalizeRoundAndPack:
470 flag aSign, bSign, zSign;
471 int16 aExp, bExp, zExp;
472 bits64 aSig, bSig, zSig0, zSig1;
480 zSign = aSign ^ bSign;
483 if (aSig || ((bExp == 0x7FF) && bSig))
484 return propagateFloat64NaN (a, b);
485 if ((bExp | bSig) == 0)
488 return float64_default_nan;
495 return propagateFloat64NaN (a, b);
496 if ((aExp | aSig) == 0)
499 return float64_default_nan;
515 zExp = aExp + bExp - 0x3FF;
516 aSig = (aSig |
LIT64 (0x0010000000000000)) << 10;
517 bSig = (bSig |
LIT64 (0x0010000000000000)) << 11;
518 mul64To128 (aSig, bSig, &zSig0, &zSig1);
519 zSig0 |= (zSig1 != 0);
520 if (0 <= (
sbits64) (zSig0 << 1))
538 flag aSign, bSign, zSign;
539 int16 aExp, bExp, zExp;
541 bits64 rem0, rem1, term0, term1;
549 zSign = aSign ^ bSign;
553 return propagateFloat64NaN (a, b);
557 return propagateFloat64NaN (a, b);
559 return float64_default_nan;
566 return propagateFloat64NaN (a, b);
573 if ((aExp | aSig) == 0)
576 return float64_default_nan;
589 zExp = aExp - bExp + 0x3FD;
590 aSig = (aSig |
LIT64 (0x0010000000000000)) << 10;
591 bSig = (bSig |
LIT64 (0x0010000000000000)) << 11;
592 if (bSig <= (aSig + aSig))
597 zSig = estimateDiv128To64 (aSig, 0, bSig);
598 if ((zSig & 0x1FF) <= 2)
600 mul64To128 (bSig, zSig, &term0, &term1);
601 sub128 (aSig, 0, term0, term1, &rem0, &rem1);
605 add128 (rem0, rem1, 0, bSig, &rem0, &rem1);
634 return aSign || ((
bits64) ((a | b) << 1) == 0);
635 return (a == b) || (aSign ^ (a < b));
649 return (((~x) & 0x8000000000000000ULL) | (x & 0x7fffffffffffffffULL));
float64 float64_div(float64 a, float64 b)
INLINE int16 extractFloat64Exp(float64 a)
flag float64_ge(float64 a, float64 b)
INLINE flag extractFloat64Sign(float64 a)
#define float_flag_divbyzero
float64 int32_to_float64(int32 a)
unsigned short int bits16
INLINE float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
float64 float64_mul(float64 a, float64 b)
#define float_flag_invalid
static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
flag float64_le(float64 a, float64 b)
float64 float64_add(float64 a, float64 b)
#define float_flag_overflow
#define float_flag_underflow
#define float_tininess_before_rounding
static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
static void normalizeFloat64Subnormal(bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr)
unsigned long long float64
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
float64 float64_neg(float64 x)
int8 float_exception_flags
#define float_round_nearest_even
x
Return the smallest n such that 2^n >= _x.
signed long long int sbits64
#define float_flag_inexact
#define float_round_to_zero
INLINE bits64 extractFloat64Frac(float64 a)
unsigned long long int bits64