exactfloat.cc
1 // Copyright 2009 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // Author: ericv@google.com (Eric Veach)
17 
18 #include "util/math/exactfloat/exactfloat.h"
19 
20 #include <cstdio>
21 #include <cstdlib>
22 #include <cstring>
23 #include <algorithm>
24 #include <cmath>
25 #include <limits>
26 
27 #include <openssl/bn.h>
28 #include <openssl/crypto.h> // for OPENSSL_free
29 
30 #include "base/logging.h"
31 #include "base/integral_types.h"
32 
33 using std::max;
34 using std::min;
35 
36 // Define storage for constants.
37 const int ExactFloat::kMinExp;
38 const int ExactFloat::kMaxExp;
39 const int ExactFloat::kMaxPrec;
40 const int32 ExactFloat::kExpNaN;
41 const int32 ExactFloat::kExpInfinity;
42 const int32 ExactFloat::kExpZero;
43 const int ExactFloat::kDoubleMantissaBits;
44 
45 // To simplify the overflow/underflow logic, we limit the exponent and
46 // precision range so that (2 * bn_exp_) does not overflow an "int". We take
47 // advantage of this, for example, by only checking for overflow/underflow
48 // *after* multiplying two numbers.
49 static_assert(
50  ExactFloat::kMaxExp <= INT_MAX / 2 &&
51  ExactFloat::kMinExp - ExactFloat::kMaxPrec >= INT_MIN / 2,
52  "exactfloat exponent might overflow");
53 
54 // We define a few simple extensions to the OpenSSL's BIGNUM interface.
55 // In some cases these depend on BIGNUM internal fields, so they might
56 // require tweaking if the BIGNUM implementation changes significantly.
57 // These are just thin wrappers for BoringSSL.
58 
59 #ifdef OPENSSL_IS_BORINGSSL
60 
61 inline static void BN_ext_set_uint64(BIGNUM* bn, uint64 v) {
62  CHECK(BN_set_u64(bn, v));
63 }
64 
65 // Return the absolute value of a BIGNUM as a 64-bit unsigned integer.
66 // Requires that BIGNUM fits into 64 bits.
67 inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) {
68  uint64_t u64;
69  if (!BN_get_u64(bn, &u64)) {
70  DCHECK(false) << "BN has " << BN_num_bits(bn) << " bits";
71  return 0;
72  }
73  return u64;
74 }
75 
76 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
77  return BN_count_low_zero_bits(bn);
78 }
79 
80 #else // !defined(OPENSSL_IS_BORINGSSL)
81 
82 // Set a BIGNUM to the given unsigned 64-bit value.
83 inline static void BN_ext_set_uint64(BIGNUM* bn, uint64 v) {
84 #if BN_BITS2 == 64
85  CHECK(BN_set_word(bn, v));
86 #else
87  static_assert(BN_BITS2 == 32, "at least 32 bit openssl build needed");
88  CHECK(BN_set_word(bn, static_cast<uint32>(v >> 32)));
89  CHECK(BN_lshift(bn, bn, 32));
90  CHECK(BN_add_word(bn, static_cast<uint32>(v)));
91 #endif
92 }
93 
94 // Return the absolute value of a BIGNUM as a 64-bit unsigned integer.
95 // Requires that BIGNUM fits into 64 bits.
96 inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) {
97  DCHECK_LE(BN_num_bytes(bn), sizeof(uint64));
98 #if BN_BITS2 == 64
99  return BN_get_word(bn);
100 #else
101  static_assert(BN_BITS2 == 32, "at least 32 bit openssl build needed");
102  if (bn->top == 0) return 0;
103  if (bn->top == 1) return BN_get_word(bn);
104  DCHECK_EQ(bn->top, 2);
105  return (static_cast<uint64>(bn->d[1]) << 32) + bn->d[0];
106 #endif
107 }
108 
109 #if OPENSSL_VERSION_NUMBER < 0x10100000L
110 
111 // Count the number of low-order zero bits in the given BIGNUM (ignoring its
112 // sign). Returns 0 if the argument is zero.
113 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
114  int count = 0;
115  for (int i = 0; i < bn->top; ++i) {
116  BN_ULONG w = bn->d[i];
117  if (w == 0) {
118  count += 8 * sizeof(BN_ULONG);
119  } else {
120  for (; (w & 1) == 0; w >>= 1) {
121  ++count;
122  }
123  break;
124  }
125  }
126  return count;
127 }
128 
129 #else // OPENSSL_VERSION_NUMBER >= 0x10100000L
130 
131 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
132  // In OpenSSL >= 1.1, BIGNUM is an opaque type, so d and top
133  // cannot be accessed. The bytes must be copied out at a ~25%
134  // performance penalty.
135  std::vector<unsigned char> bytes(BN_num_bytes(bn));
136  // "le" indicates little endian.
137  size_t ret = BN_bn2lebinpad(bn, bytes.data(), bytes.size());
138  DCHECK_EQ(ret, bytes.size());
139 
140  int count = 0;
141  for (unsigned char c : bytes) {
142  if (c == 0) {
143  count += 8;
144  } else {
145  for (; (c & 1) == 0; c >>= 1) {
146  ++count;
147  }
148  break;
149  }
150  }
151  return count;
152 }
153 
154 #endif // OPENSSL_VERSION_NUMBER >= 0x10100000L
155 
156 #endif // !defined(OPENSSL_IS_BORINGSSL)
157 
158 ExactFloat::ExactFloat(double v) {
159  sign_ = std::signbit(v) ? -1 : 1;
160  if (std::isnan(v)) {
161  set_nan();
162  } else if (std::isinf(v)) {
163  set_inf(sign_);
164  } else {
165  // The following code is much simpler than messing about with bit masks,
166  // has the advantage of handling denormalized numbers and zero correctly,
167  // and is actually quite efficient (at least compared to the rest of this
168  // code). "f" is a fraction in the range [0.5, 1), so if we shift it left
169  // by the number of mantissa bits in a double (53, including the leading
170  // "1") then the result is always an integer.
171  int exp;
172  double f = frexp(fabs(v), &exp);
173  uint64 m = static_cast<uint64>(ldexp(f, kDoubleMantissaBits));
174  BN_ext_set_uint64(bn_.get(), m);
175  bn_exp_ = exp - kDoubleMantissaBits;
176  Canonicalize();
177  }
178 }
179 
180 ExactFloat::ExactFloat(int v) {
181  sign_ = (v >= 0) ? 1 : -1;
182  // Note that this works even for INT_MIN because the parameter type for
183  // BN_set_word() is unsigned.
184  CHECK(BN_set_word(bn_.get(), abs(v)));
185  bn_exp_ = 0;
186  Canonicalize();
187 }
188 
189 ExactFloat::ExactFloat(const ExactFloat& b)
190  : sign_(b.sign_),
191  bn_exp_(b.bn_exp_) {
192  BN_copy(bn_.get(), b.bn_.get());
193 }
194 
195 ExactFloat ExactFloat::SignedZero(int sign) {
196  ExactFloat r;
197  r.set_zero(sign);
198  return r;
199 }
200 
201 ExactFloat ExactFloat::Infinity(int sign) {
202  ExactFloat r;
203  r.set_inf(sign);
204  return r;
205 }
206 
207 ExactFloat ExactFloat::NaN() {
208  ExactFloat r;
209  r.set_nan();
210  return r;
211 }
212 
213 int ExactFloat::prec() const {
214  return BN_num_bits(bn_.get());
215 }
216 
217 int ExactFloat::exp() const {
218  DCHECK(is_normal());
219  return bn_exp_ + BN_num_bits(bn_.get());
220 }
221 
222 void ExactFloat::set_zero(int sign) {
223  sign_ = sign;
224  bn_exp_ = kExpZero;
225  if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
226 }
227 
228 void ExactFloat::set_inf(int sign) {
229  sign_ = sign;
230  bn_exp_ = kExpInfinity;
231  if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
232 }
233 
234 void ExactFloat::set_nan() {
235  sign_ = 1;
236  bn_exp_ = kExpNaN;
237  if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
238 }
239 
240 double ExactFloat::ToDouble() const {
241  // If the mantissa has too many bits, we need to round it.
242  if (prec() <= kDoubleMantissaBits) {
243  return ToDoubleHelper();
244  } else {
245  ExactFloat r = RoundToMaxPrec(kDoubleMantissaBits, kRoundTiesToEven);
246  return r.ToDoubleHelper();
247  }
248 }
249 
250 double ExactFloat::ToDoubleHelper() const {
251  size_t ret = BN_num_bits(bn_.get());
252  DCHECK_LE(ret, kDoubleMantissaBits);
253  if (!is_normal()) {
254  if (is_zero()) return copysign(0, sign_);
255  if (is_inf()) return copysign(INFINITY, sign_);
256  return copysign(NAN, sign_);
257  }
258  uint64 d_mantissa = BN_ext_get_uint64(bn_.get());
259  // We rely on ldexp() to handle overflow and underflow. (It will return a
260  // signed zero or infinity if the result is too small or too large.)
261  return sign_ * ldexp(static_cast<double>(d_mantissa), bn_exp_);
262 }
263 
264 ExactFloat ExactFloat::RoundToMaxPrec(int max_prec, RoundingMode mode) const {
265  // The "kRoundTiesToEven" mode requires at least 2 bits of precision
266  // (otherwise both adjacent representable values may be odd).
267  DCHECK_GE(max_prec, 2);
268  DCHECK_LE(max_prec, kMaxPrec);
269 
270  // The following test also catches zero, infinity, and NaN.
271  int shift = prec() - max_prec;
272  if (shift <= 0) return *this;
273 
274  // Round by removing the appropriate number of bits from the mantissa. Note
275  // that if the value is rounded up to a power of 2, the high-order bit
276  // position may increase, but in that case Canonicalize() will remove at
277  // least one zero bit and so the output will still have prec() <= max_prec.
278  return RoundToPowerOf2(bn_exp_ + shift, mode);
279 }
280 
281 ExactFloat ExactFloat::RoundToPowerOf2(int bit_exp, RoundingMode mode) const {
282  DCHECK_GE(bit_exp, kMinExp - kMaxPrec);
283  DCHECK_LE(bit_exp, kMaxExp);
284 
285  // If the exponent is already large enough, or the value is zero, infinity,
286  // or NaN, then there is nothing to do.
287  int shift = bit_exp - bn_exp_;
288  if (shift <= 0) return *this;
289  DCHECK(is_normal());
290 
291  // Convert rounding up/down to toward/away from zero, so that we don't need
292  // to consider the sign of the number from this point onward.
293  if (mode == kRoundTowardPositive) {
294  mode = (sign_ > 0) ? kRoundAwayFromZero : kRoundTowardZero;
295  } else if (mode == kRoundTowardNegative) {
296  mode = (sign_ > 0) ? kRoundTowardZero : kRoundAwayFromZero;
297  }
298 
299  // Rounding consists of right-shifting the mantissa by "shift", and then
300  // possibly incrementing the result (depending on the rounding mode, the
301  // bits that were discarded, and sometimes the lowest kept bit). The
302  // following code figures out whether we need to increment.
303  ExactFloat r;
304  bool increment = false;
305  if (mode == kRoundTowardZero) {
306  // Never increment.
307  } else if (mode == kRoundTiesAwayFromZero) {
308  // Increment if the highest discarded bit is 1.
309  if (BN_is_bit_set(bn_.get(), shift - 1))
310  increment = true;
311  } else if (mode == kRoundAwayFromZero) {
312  // Increment unless all discarded bits are zero.
313  if (BN_ext_count_low_zero_bits(bn_.get()) < shift)
314  increment = true;
315  } else {
316  DCHECK_EQ(mode, kRoundTiesToEven);
317  // Let "w/xyz" denote a mantissa where "w" is the lowest kept bit and
318  // "xyz" are the discarded bits. Then using regexp notation:
319  // ./0.* -> Don't increment (fraction < 1/2)
320  // 0/10* -> Don't increment (fraction = 1/2, kept part even)
321  // 1/10* -> Increment (fraction = 1/2, kept part odd)
322  // ./1.*1.* -> Increment (fraction > 1/2)
323  if (BN_is_bit_set(bn_.get(), shift - 1) &&
324  ((BN_is_bit_set(bn_.get(), shift) ||
325  BN_ext_count_low_zero_bits(bn_.get()) < shift - 1))) {
326  increment = true;
327  }
328  }
329  r.bn_exp_ = bn_exp_ + shift;
330  CHECK(BN_rshift(r.bn_.get(), bn_.get(), shift));
331  if (increment) {
332  CHECK(BN_add_word(r.bn_.get(), 1));
333  }
334  r.sign_ = sign_;
335  r.Canonicalize();
336  return r;
337 }
338 
339 int ExactFloat::NumSignificantDigitsForPrec(int prec) {
340  // The simplest bound is
341  //
342  // d <= 1 + ceil(prec * log10(2))
343  //
344  // The following bound is tighter by 0.5 digits on average, but requires
345  // the exponent to be known as well:
346  //
347  // d <= ceil(exp * log10(2)) - floor((exp - prec) * log10(2))
348  //
349  // Since either of these bounds can be too large by 0, 1, or 2 digits, we
350  // stick with the simpler first bound.
351  return static_cast<int>(1 + ceil(prec * (M_LN2 / M_LN10)));
352 }
353 
354 // Numbers are always formatted with at least this many significant digits.
355 // This prevents small integers from being formatted in exponential notation
356 // (e.g. 1024 formatted as 1e+03), and also avoids the confusion of having
357 // supposedly "high precision" numbers formatted with just 1 or 2 digits
358 // (e.g. 1/512 == 0.001953125 formatted as 0.002).
359 static const int kMinSignificantDigits = 10;
360 
361 string ExactFloat::ToString() const {
362  int max_digits = max(kMinSignificantDigits,
363  NumSignificantDigitsForPrec(prec()));
364  return ToStringWithMaxDigits(max_digits);
365 }
366 
367 string ExactFloat::ToStringWithMaxDigits(int max_digits) const {
368  DCHECK_GT(max_digits, 0);
369  if (!is_normal()) {
370  if (is_nan()) return "nan";
371  if (is_zero()) return (sign_ < 0) ? "-0" : "0";
372  return (sign_ < 0) ? "-inf" : "inf";
373  }
374  string digits;
375  int exp10 = GetDecimalDigits(max_digits, &digits);
376  string str;
377  if (sign_ < 0) str.push_back('-');
378 
379  // We use the standard '%g' formatting rules. If the exponent is less than
380  // -4 or greater than or equal to the requested precision (i.e., max_digits)
381  // then we use exponential notation.
382  //
383  // But since "exp10" is the base-10 exponent corresponding to a mantissa in
384  // the range [0.1, 1), whereas the '%g' rules assume a mantissa in the range
385  // [1.0, 10), we need to adjust these parameters by 1.
386  if (exp10 <= -4 || exp10 > max_digits) {
387  // Use exponential format.
388  str.push_back(digits[0]);
389  if (digits.size() > 1) {
390  str.push_back('.');
391  str.append(digits.begin() + 1, digits.end());
392  }
393  char exp_buf[20];
394  sprintf(exp_buf, "e%+02d", exp10 - 1);
395  str += exp_buf;
396  } else {
397  // Use fixed format. We split this into two cases depending on whether
398  // the integer portion is non-zero or not.
399  if (exp10 > 0) {
400  if (unsigned(exp10) >= digits.size()) {
401  str += digits;
402  for (int i = exp10 - digits.size(); i > 0; --i) {
403  str.push_back('0');
404  }
405  } else {
406  str.append(digits.begin(), digits.begin() + exp10);
407  str.push_back('.');
408  str.append(digits.begin() + exp10, digits.end());
409  }
410  } else {
411  str += "0.";
412  for (int i = exp10; i < 0; ++i) {
413  str.push_back('0');
414  }
415  str += digits;
416  }
417  }
418  return str;
419 }
420 
421 // Increment an unsigned integer represented as a string of ASCII digits.
422 static void IncrementDecimalDigits(string* digits) {
423  string::iterator pos = digits->end();
424  while (--pos >= digits->begin()) {
425  if (*pos < '9') { ++*pos; return; }
426  *pos = '0';
427  }
428  digits->insert(0, "1");
429 }
430 
431 int ExactFloat::GetDecimalDigits(int max_digits, string* digits) const {
432  DCHECK(is_normal());
433  // Convert the value to the form (bn * (10 ** bn_exp10)) where "bn" is a
434  // positive integer (BIGNUM).
435  BIGNUM* bn = BN_new();
436  int bn_exp10;
437  if (bn_exp_ >= 0) {
438  // The easy case: bn = bn_ * (2 ** bn_exp_)), bn_exp10 = 0.
439  CHECK(BN_lshift(bn, bn_.get(), bn_exp_));
440  bn_exp10 = 0;
441  } else {
442  // Set bn = bn_ * (5 ** -bn_exp_) and bn_exp10 = bn_exp_. This is
443  // equivalent to the original value of (bn_ * (2 ** bn_exp_)).
444  BIGNUM* power = BN_new();
445  CHECK(BN_set_word(power, -bn_exp_));
446  CHECK(BN_set_word(bn, 5));
447  BN_CTX* ctx = BN_CTX_new();
448  CHECK(BN_exp(bn, bn, power, ctx));
449  CHECK(BN_mul(bn, bn, bn_.get(), ctx));
450  BN_CTX_free(ctx);
451  BN_free(power);
452  bn_exp10 = bn_exp_;
453  }
454  // Now convert "bn" to a decimal string.
455  char* all_digits = BN_bn2dec(bn);
456  DCHECK(all_digits != nullptr);
457  BN_free(bn);
458  // Check whether we have too many digits and round if necessary.
459  int num_digits = strlen(all_digits);
460  if (num_digits <= max_digits) {
461  *digits = all_digits;
462  } else {
463  digits->assign(all_digits, max_digits);
464  // Standard "printf" formatting rounds ties to an even number. This means
465  // that we round up (away from zero) if highest discarded digit is '5' or
466  // more, unless all other discarded digits are zero in which case we round
467  // up only if the lowest kept digit is odd.
468  if (all_digits[max_digits] >= '5' &&
469  ((all_digits[max_digits-1] & 1) == 1 ||
470  strpbrk(all_digits + max_digits + 1, "123456789") != nullptr)) {
471  // This can increase the number of digits by 1, but in that case at
472  // least one trailing zero will be stripped off below.
473  IncrementDecimalDigits(digits);
474  }
475  // Adjust the base-10 exponent to reflect the digits we have removed.
476  bn_exp10 += num_digits - max_digits;
477  }
478  OPENSSL_free(all_digits);
479 
480  // Now strip any trailing zeros.
481  DCHECK_NE((*digits)[0], '0');
482  string::iterator pos = digits->end();
483  while (pos[-1] == '0') --pos;
484  if (pos < digits->end()) {
485  bn_exp10 += digits->end() - pos;
486  digits->erase(pos, digits->end());
487  }
488  DCHECK_LE(digits->size(), max_digits);
489 
490  // Finally, we adjust the base-10 exponent so that the mantissa is a
491  // fraction in the range [0.1, 1) rather than an integer.
492  return bn_exp10 + digits->size();
493 }
494 
495 string ExactFloat::ToUniqueString() const {
496  char prec_buf[20];
497  sprintf(prec_buf, "<%d>", prec());
498  return ToString() + prec_buf;
499 }
500 
501 ExactFloat& ExactFloat::operator=(const ExactFloat& b) {
502  if (this != &b) {
503  sign_ = b.sign_;
504  bn_exp_ = b.bn_exp_;
505  BN_copy(bn_.get(), b.bn_.get());
506  }
507  return *this;
508 }
509 
510 ExactFloat ExactFloat::operator-() const {
511  return CopyWithSign(-sign_);
512 }
513 
514 ExactFloat operator+(const ExactFloat& a, const ExactFloat& b) {
515  return ExactFloat::SignedSum(a.sign_, &a, b.sign_, &b);
516 }
517 
518 ExactFloat operator-(const ExactFloat& a, const ExactFloat& b) {
519  return ExactFloat::SignedSum(a.sign_, &a, -b.sign_, &b);
520 }
521 
522 ExactFloat ExactFloat::SignedSum(int a_sign, const ExactFloat* a,
523  int b_sign, const ExactFloat* b) {
524  if (!a->is_normal() || !b->is_normal()) {
525  // Handle zero, infinity, and NaN according to IEEE 754-2008.
526  if (a->is_nan()) return *a;
527  if (b->is_nan()) return *b;
528  if (a->is_inf()) {
529  // Adding two infinities with opposite sign yields NaN.
530  if (b->is_inf() && a_sign != b_sign) return NaN();
531  return Infinity(a_sign);
532  }
533  if (b->is_inf()) return Infinity(b_sign);
534  if (a->is_zero()) {
535  if (!b->is_zero()) return b->CopyWithSign(b_sign);
536  // Adding two zeros with the same sign preserves the sign.
537  if (a_sign == b_sign) return SignedZero(a_sign);
538  // Adding two zeros of opposite sign produces +0.
539  return SignedZero(+1);
540  }
541  DCHECK(b->is_zero());
542  return a->CopyWithSign(a_sign);
543  }
544  // Swap the numbers if necessary so that "a" has the larger bn_exp_.
545  if (a->bn_exp_ < b->bn_exp_) {
546  using std::swap;
547  swap(a_sign, b_sign);
548  swap(a, b);
549  }
550  // Shift "a" if necessary so that both values have the same bn_exp_.
551  ExactFloat r;
552  if (a->bn_exp_ > b->bn_exp_) {
553  CHECK(BN_lshift(r.bn_.get(), a->bn_.get(), a->bn_exp_ - b->bn_exp_));
554  a = &r; // The only field of "a" used below is bn_.
555  }
556  r.bn_exp_ = b->bn_exp_;
557  if (a_sign == b_sign) {
558  CHECK(BN_add(r.bn_.get(), a->bn_.get(), b->bn_.get()));
559  r.sign_ = a_sign;
560  } else {
561  // Note that the BIGNUM documentation is out of date -- all methods now
562  // allow the result to be the same as any input argument, so it is okay if
563  // (a == &r) due to the shift above.
564  CHECK(BN_sub(r.bn_.get(), a->bn_.get(), b->bn_.get()));
565  if (BN_is_zero(r.bn_.get())) {
566  r.sign_ = +1;
567  } else if (BN_is_negative(r.bn_.get())) {
568  // The magnitude of "b" was larger.
569  r.sign_ = b_sign;
570  BN_set_negative(r.bn_.get(), false);
571  } else {
572  // They were equal, or the magnitude of "a" was larger.
573  r.sign_ = a_sign;
574  }
575  }
576  r.Canonicalize();
577  return r;
578 }
579 
580 void ExactFloat::Canonicalize() {
581  if (!is_normal()) return;
582 
583  // Underflow/overflow occurs if exp() is not in [kMinExp, kMaxExp].
584  // We also convert a zero mantissa to signed zero.
585  int my_exp = exp();
586  if (my_exp < kMinExp || BN_is_zero(bn_.get())) {
587  set_zero(sign_);
588  } else if (my_exp > kMaxExp) {
589  set_inf(sign_);
590  } else if (!BN_is_odd(bn_.get())) {
591  // Remove any low-order zero bits from the mantissa.
592  DCHECK(!BN_is_zero(bn_.get()));
593  int shift = BN_ext_count_low_zero_bits(bn_.get());
594  if (shift > 0) {
595  CHECK(BN_rshift(bn_.get(), bn_.get(), shift));
596  bn_exp_ += shift;
597  }
598  }
599  // If the mantissa has too many bits, we replace it by NaN to indicate
600  // that an inexact calculation has occurred.
601  if (prec() > kMaxPrec) {
602  set_nan();
603  }
604 }
605 
606 ExactFloat operator*(const ExactFloat& a, const ExactFloat& b) {
607  int result_sign = a.sign_ * b.sign_;
608  if (!a.is_normal() || !b.is_normal()) {
609  // Handle zero, infinity, and NaN according to IEEE 754-2008.
610  if (a.is_nan()) return a;
611  if (b.is_nan()) return b;
612  if (a.is_inf()) {
613  // Infinity times zero yields NaN.
614  if (b.is_zero()) return ExactFloat::NaN();
615  return ExactFloat::Infinity(result_sign);
616  }
617  if (b.is_inf()) {
618  if (a.is_zero()) return ExactFloat::NaN();
619  return ExactFloat::Infinity(result_sign);
620  }
621  DCHECK(a.is_zero() || b.is_zero());
622  return ExactFloat::SignedZero(result_sign);
623  }
624  ExactFloat r;
625  r.sign_ = result_sign;
626  r.bn_exp_ = a.bn_exp_ + b.bn_exp_;
627  BN_CTX* ctx = BN_CTX_new();
628  CHECK(BN_mul(r.bn_.get(), a.bn_.get(), b.bn_.get(), ctx));
629  BN_CTX_free(ctx);
630  r.Canonicalize();
631  return r;
632 }
633 
634 bool operator==(const ExactFloat& a, const ExactFloat& b) {
635  // NaN is not equal to anything, not even itself.
636  if (a.is_nan() || b.is_nan()) return false;
637 
638  // Since Canonicalize() strips low-order zero bits, all other cases
639  // (including non-normal values) require bn_exp_ to be equal.
640  if (a.bn_exp_ != b.bn_exp_) return false;
641 
642  // Positive and negative zero are equal.
643  if (a.is_zero() && b.is_zero()) return true;
644 
645  // Otherwise, the signs and mantissas must match. Note that non-normal
646  // values such as infinity have a mantissa of zero.
647  return a.sign_ == b.sign_ && BN_ucmp(a.bn_.get(), b.bn_.get()) == 0;
648 }
649 
650 int ExactFloat::ScaleAndCompare(const ExactFloat& b) const {
651  DCHECK(is_normal() && b.is_normal() && bn_exp_ >= b.bn_exp_);
652  ExactFloat tmp = *this;
653  CHECK(BN_lshift(tmp.bn_.get(), tmp.bn_.get(), bn_exp_ - b.bn_exp_));
654  return BN_ucmp(tmp.bn_.get(), b.bn_.get());
655 }
656 
657 bool ExactFloat::UnsignedLess(const ExactFloat& b) const {
658  // Handle the zero/infinity cases (NaN has already been done).
659  if (is_inf() || b.is_zero()) return false;
660  if (is_zero() || b.is_inf()) return true;
661  // If the high-order bit positions differ, we are done.
662  int cmp = exp() - b.exp();
663  if (cmp != 0) return cmp < 0;
664  // Otherwise shift one of the two values so that they both have the same
665  // bn_exp_ and then compare the mantissas.
666  return (bn_exp_ >= b.bn_exp_ ?
667  ScaleAndCompare(b) < 0 : b.ScaleAndCompare(*this) > 0);
668 }
669 
670 bool operator<(const ExactFloat& a, const ExactFloat& b) {
671  // NaN is unordered compared to everything, including itself.
672  if (a.is_nan() || b.is_nan()) return false;
673  // Positive and negative zero are equal.
674  if (a.is_zero() && b.is_zero()) return false;
675  // Otherwise, anything negative is less than anything positive.
676  if (a.sign_ != b.sign_) return a.sign_ < b.sign_;
677  // Now we just compare absolute values.
678  return (a.sign_ > 0) ? a.UnsignedLess(b) : b.UnsignedLess(a);
679 }
680 
681 ExactFloat fabs(const ExactFloat& a) {
682  return abs(a);
683 }
684 
685 ExactFloat abs(const ExactFloat& a) {
686  return a.CopyWithSign(+1);
687 }
688 
689 ExactFloat fmax(const ExactFloat& a, const ExactFloat& b) {
690  // If one argument is NaN, return the other argument.
691  if (a.is_nan()) return b;
692  if (b.is_nan()) return a;
693  // Not required by IEEE 754, but we prefer +0 over -0.
694  if (a.sign_ != b.sign_) {
695  return (a.sign_ < b.sign_) ? b : a;
696  }
697  return (a < b) ? b : a;
698 }
699 
700 ExactFloat fmin(const ExactFloat& a, const ExactFloat& b) {
701  // If one argument is NaN, return the other argument.
702  if (a.is_nan()) return b;
703  if (b.is_nan()) return a;
704  // Not required by IEEE 754, but we prefer -0 over +0.
705  if (a.sign_ != b.sign_) {
706  return (a.sign_ < b.sign_) ? a : b;
707  }
708  return (a < b) ? a : b;
709 }
710 
711 ExactFloat fdim(const ExactFloat& a, const ExactFloat& b) {
712  // This formulation has the correct behavior for NaNs.
713  return (a <= b) ? 0 : (a - b);
714 }
715 
716 ExactFloat ceil(const ExactFloat& a) {
717  return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardPositive);
718 }
719 
720 ExactFloat floor(const ExactFloat& a) {
721  return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardNegative);
722 }
723 
724 ExactFloat trunc(const ExactFloat& a) {
725  return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardZero);
726 }
727 
728 ExactFloat round(const ExactFloat& a) {
729  return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesAwayFromZero);
730 }
731 
732 ExactFloat rint(const ExactFloat& a) {
733  return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesToEven);
734 }
735 
736 template <class T>
737 T ExactFloat::ToInteger(RoundingMode mode) const {
738  using std::numeric_limits;
739  static_assert(sizeof(T) <= sizeof(uint64), "max 64 bits supported");
740  static_assert(numeric_limits<T>::is_signed, "only signed types supported");
741  const int64 kMinValue = numeric_limits<T>::min();
742  const int64 kMaxValue = numeric_limits<T>::max();
743 
744  ExactFloat r = RoundToPowerOf2(0, mode);
745  if (r.is_nan()) return kMaxValue;
746  if (r.is_zero()) return 0;
747  if (!r.is_inf()) {
748  // If the unsigned value has more than 63 bits it is always clamped.
749  if (r.exp() < 64) {
750  int64 value = BN_ext_get_uint64(r.bn_.get()) << r.bn_exp_;
751  if (r.sign_ < 0) value = -value;
752  return max(kMinValue, min(kMaxValue, value));
753  }
754  }
755  return (r.sign_ < 0) ? kMinValue : kMaxValue;
756 }
757 
758 long lrint(const ExactFloat& a) {
759  return a.ToInteger<long>(ExactFloat::kRoundTiesToEven);
760 }
761 
762 long long llrint(const ExactFloat& a) {
763  return a.ToInteger<long long>(ExactFloat::kRoundTiesToEven);
764 }
765 
766 long lround(const ExactFloat& a) {
767  return a.ToInteger<long>(ExactFloat::kRoundTiesAwayFromZero);
768 }
769 
770 long long llround(const ExactFloat& a) {
771  return a.ToInteger<long long>(ExactFloat::kRoundTiesAwayFromZero);
772 }
773 
774 ExactFloat copysign(const ExactFloat& a, const ExactFloat& b) {
775  return a.CopyWithSign(b.sign_);
776 }
777 
778 ExactFloat frexp(const ExactFloat& a, int* exp) {
779  if (!a.is_normal()) {
780  // If a == 0, exp should be zero. If a.is_inf() or a.is_nan(), exp is not
781  // defined but the glibc implementation returns zero.
782  *exp = 0;
783  return a;
784  }
785  *exp = a.exp();
786  return ldexp(a, -a.exp());
787 }
788 
789 ExactFloat ldexp(const ExactFloat& a, int exp) {
790  if (!a.is_normal()) return a;
791 
792  // To prevent integer overflow, we first clamp "exp" so that
793  // (kMinExp - 1) <= (a_exp + exp) <= (kMaxExp + 1).
794  int a_exp = a.exp();
795  exp = min(ExactFloat::kMaxExp + 1 - a_exp,
796  max(ExactFloat::kMinExp - 1 + a_exp, exp));
797 
798  // Now modify the exponent and check for overflow/underflow.
799  ExactFloat r = a;
800  r.bn_exp_ += exp;
801  r.Canonicalize();
802  return r;
803 }
804 
805 ExactFloat scalbln(const ExactFloat& a, long exp) {
806  // Clamp the exponent to the range of "int" in order to avoid truncation.
807  exp = max(static_cast<long>(INT_MIN), min(static_cast<long>(INT_MAX), exp));
808  return ldexp(a, exp);
809 }
810 
811 int ilogb(const ExactFloat& a) {
812  if (a.is_zero()) return FP_ILOGB0;
813  if (a.is_inf()) return INT_MAX;
814  if (a.is_nan()) return FP_ILOGBNAN;
815  // a.exp() assumes the significand is in the range [0.5, 1).
816  return a.exp() - 1;
817 }
818 
819 ExactFloat logb(const ExactFloat& a) {
820  if (a.is_zero()) return ExactFloat::Infinity(-1);
821  if (a.is_inf()) return ExactFloat::Infinity(+1); // Even if a < 0.
822  if (a.is_nan()) return a;
823  // exp() assumes the significand is in the range [0.5,1).
824  return ExactFloat(a.exp() - 1);
825 }
826 
827 ExactFloat ExactFloat::Unimplemented() {
828  LOG(FATAL) << "Unimplemented ExactFloat method called";
829  return NaN();
830 }