65 #include "softfloat-macros" 75 #include "softfloat-specialize" 85 return a &
LIT64 (0x000FFFFFFFFFFFFF);
97 return (a >> 52) & 0x7FF;
125 shiftCount = countLeadingZeros64 (aSig) - 11;
126 *zSigPtr = aSig << shiftCount;
127 *zExpPtr = 1 - shiftCount;
146 return (((
bits64) zSign) << 63) + (((
bits64) zExp) << 52) + zSig;
176 flag roundNearestEven, isTiny;
177 int16 roundIncrement, roundBits;
181 roundIncrement = 0x200;
182 if (!roundNearestEven)
190 roundIncrement = 0x3FF;
203 roundBits = zSig & 0x3FF;
204 if (0x7FD <= (
bits16) zExp)
207 || ((zExp == 0x7FD) && ((
sbits64) (zSig + roundIncrement) < 0)))
210 return packFloat64 (zSign, 0x7FF, 0) - (roundIncrement == 0);
216 || (zSig + roundIncrement <
LIT64 (0x8000000000000000));
217 shift64RightJamming (zSig, -zExp, &zSig);
219 roundBits = zSig & 0x3FF;
220 if (isTiny && roundBits)
226 zSig = (zSig + roundIncrement) >> 10;
227 zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
248 shiftCount = countLeadingZeros64 (zSig) - 1;
270 absA = zSign ? -a : a;
271 shiftCount = countLeadingZeros32 (absA) + 21;
273 return packFloat64 (zSign, 0x432 - shiftCount, zSig << shiftCount);
288 int16 aExp, bExp, zExp;
296 expDiff = aExp - bExp;
304 return propagateFloat64NaN (a, b);
310 bSig |=
LIT64 (0x2000000000000000);
311 shift64RightJamming (bSig, expDiff, &bSig);
314 else if (expDiff < 0)
319 return propagateFloat64NaN (a, b);
326 aSig |=
LIT64 (0x2000000000000000);
328 shift64RightJamming (aSig, -expDiff, &aSig);
336 return propagateFloat64NaN (a, b);
341 zSig =
LIT64 (0x4000000000000000) + aSig + bSig;
345 aSig |=
LIT64 (0x2000000000000000);
346 zSig = (aSig + bSig) << 1;
369 int16 aExp, bExp, zExp;
377 expDiff = aExp - bExp;
387 return propagateFloat64NaN (a, b);
389 return float64_default_nan;
405 return propagateFloat64NaN (a, b);
411 aSig |=
LIT64 (0x4000000000000000);
412 shift64RightJamming (aSig, -expDiff, &aSig);
413 bSig |=
LIT64 (0x4000000000000000);
418 goto normalizeRoundAndPack;
423 return propagateFloat64NaN (a, b);
429 bSig |=
LIT64 (0x4000000000000000);
430 shift64RightJamming (bSig, expDiff, &bSig);
431 aSig |=
LIT64 (0x4000000000000000);
435 normalizeRoundAndPack:
470 flag aSign, bSign, zSign;
471 int16 aExp, bExp, zExp;
472 bits64 aSig, bSig, zSig0, zSig1;
480 zSign = aSign ^ bSign;
483 if (aSig || ((bExp == 0x7FF) && bSig))
484 return propagateFloat64NaN (a, b);
485 if ((bExp | bSig) == 0)
488 return float64_default_nan;
495 return propagateFloat64NaN (a, b);
496 if ((aExp | aSig) == 0)
499 return float64_default_nan;
515 zExp = aExp + bExp - 0x3FF;
516 aSig = (aSig |
LIT64 (0x0010000000000000)) << 10;
517 bSig = (bSig |
LIT64 (0x0010000000000000)) << 11;
518 mul64To128 (aSig, bSig, &zSig0, &zSig1);
519 zSig0 |= (zSig1 != 0);
520 if (0 <= (
sbits64) (zSig0 << 1))
538 flag aSign, bSign, zSign;
539 int16 aExp, bExp, zExp;
541 bits64 rem0, rem1, term0, term1;
549 zSign = aSign ^ bSign;
553 return propagateFloat64NaN (a, b);
557 return propagateFloat64NaN (a, b);
559 return float64_default_nan;
566 return propagateFloat64NaN (a, b);
573 if ((aExp | aSig) == 0)
576 return float64_default_nan;
589 zExp = aExp - bExp + 0x3FD;
590 aSig = (aSig |
LIT64 (0x0010000000000000)) << 10;
591 bSig = (bSig |
LIT64 (0x0010000000000000)) << 11;
592 if (bSig <= (aSig + aSig))
597 zSig = estimateDiv128To64 (aSig, 0, bSig);
598 if ((zSig & 0x1FF) <= 2)
600 mul64To128 (bSig, zSig, &term0, &term1);
601 sub128 (aSig, 0, term0, term1, &rem0, &rem1);
605 add128 (rem0, rem1, 0, bSig, &rem0, &rem1);
634 return aSign || ((
bits64) ((a | b) << 1) == 0);
635 return (a == b) || (aSign ^ (a < b));
649 return (((~x) & 0x8000000000000000ULL) | (x & 0x7fffffffffffffffULL));
float64 float64_div(float64 a, float64 b)
INLINE int16 extractFloat64Exp(float64 a)
flag float64_ge(float64 a, float64 b)
INLINE flag extractFloat64Sign(float64 a)
#define float_flag_divbyzero
float64 int32_to_float64(int32 a)
unsigned short int bits16
INLINE float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
float64 float64_mul(float64 a, float64 b)
#define float_flag_invalid
flag float64_le(float64 a, float64 b)
float64 float64_add(float64 a, float64 b)
#define float_flag_overflow
static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
#define float_flag_underflow
#define float_tininess_before_rounding
unsigned long long float64
float64 float64_neg(float64 x)
int8 float_exception_flags
#define float_round_nearest_even
x
Return the smallest n such that 2^n >= _x.
signed long long int sbits64
#define float_flag_inexact
#define float_round_to_zero
static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
INLINE bits64 extractFloat64Frac(float64 a)
static void normalizeFloat64Subnormal(bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr)
unsigned long long int bits64