exactfloat.h
1 // Copyright 2009 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // Author: ericv@google.com (Eric Veach)
17 //
18 // ExactFloat is a multiple-precision floating point type based on the OpenSSL
19 // Bignum library. It has the same interface as the built-in "float" and
20 // "double" types, but only supports the subset of operators and intrinsics
21 // where it is possible to compute the result exactly. So for example,
22 // ExactFloat supports addition and multiplication but not division (since in
23 // general, the quotient of two floating-point numbers cannot be represented
24 // exactly). Exact arithmetic is useful for geometric algorithms, especially
25 // for disambiguating cases where ordinary double-precision arithmetic yields
26 // an uncertain result.
27 //
28 // ExactFloat is a subset of the faster and more capable MPFloat class (which
29 // is based on the GNU MPFR library). The main reason to use this class
30 // rather than MPFloat is that it is subject to a BSD-style license rather
31 // than the much more restrictive LGPL license.
32 //
33 // It has the following features:
34 //
35 // - ExactFloat uses the same syntax as the built-in "float" and "double"
36 // types, for example: x += 4 + fabs(2*y*y - z*z). There are a few
37 // differences (see below), but the syntax is compatible enough so that
38 // ExactFloat can be used as a template argument to templatized classes
39 // such as Vector2, VectorN, Matrix3x3, etc.
40 //
41 // - Results are not rounded; instead, precision is increased so that the
42 // result can be represented exactly. An inexact result is returned only
43 // in the case of underflow or overflow (yielding signed zero or infinity
44 // respectively), or if the maximum allowed precision is exceeded (yielding
45 // NaN). ExactFloat uses IEEE 754-2008 rules for handling infinities, NaN,
46 // rounding to integers, etc.
47 //
48 // - ExactFloat only supports calculations where the result can be
49 // represented exactly. Therefore it supports intrinsics such as fabs()
50 // but not transcendentals such as sin(), sqrt(), etc.
51 //
52 // Syntax Compatibility with "float" and "double"
53 // ----------------------------------------------
54 //
55 // ExactFloat supports a subset of the operators and intrinsics for the
56 // built-in "double" type. (Thus it supports fabs() but not fabsf(), for
57 // example.) The syntax is different only in the following cases:
58 //
59 // - Casts and implicit conversions to built-in types (including "bool") are
60 // not supported. So for example, the following will not compile:
61 //
62 // ExactFloat x = 7.5;
63 // double y = x; // ERROR: use x.ToDouble() instead
64 // long z = x; // ERROR: use x.ToDouble() or lround(trunc(x))
65 // q = static_cast<int>(x); // ERROR: use x.ToDouble() or lround(trunc(x))
66 // if (x) { ... } // ERROR: use (x != 0) instead
67 //
68 // - The glibc floating-point classification macros (fpclassify, isfinite,
69 // isnormal, isnan, isinf) are not supported. Instead there are
70 // zero-argument methods:
71 //
72 // ExactFloat x;
73 // if (isnan(x)) { ... } // ERROR: use (x.is_nan()) instead
74 // if (isinf(x)) { ... } // ERROR: use (x.is_inf()) instead
75 //
76 // Using ExactFloat with Vector3, etc.
77 // -----------------------------------
78 //
79 // ExactFloat can be used with templatized classes such as Vector2 and Vector3
80 // (see "util/math/vector.h"), with the following limitations:
81 //
82 // - Cast() can be used to convert other vector types to an ExactFloat vector
83 // type, but not the other way around. This is because there are no
84 // implicit conversions from ExactFloat to built-in types. You can work
85 // around this by calling an explicit conversion method such as
86 // ToDouble(). For example:
87 //
88 // typedef Vector3<ExactFloat> Vector3_xf;
89 // Vector3_xf x;
90 // Vector3_d y;
91 // x = Vector3_xf::Cast(y); // This works.
92 // y = Vector3_d::Cast(x); // This doesn't.
93 // y = Vector3_d(x[0].ToDouble(), x[1].ToDouble(), x[2].ToDouble()); // OK
94 //
95 // - IsNaN() is not supported because it calls isnan(), which is defined as a
96 // macro in <math.h> and therefore can't easily be overrided.
97 //
98 // Precision Semantics
99 // -------------------
100 //
101 // Unlike MPFloat, ExactFloat does not allow a maximum precision to be
102 // specified (it is always unbounded). Therefore it does not have any of the
103 // corresponding constructors.
104 //
105 // The current precision of an ExactFloat (i.e., the number of bits in its
106 // mantissa) is returned by prec(). The precision is increased as necessary
107 // so that the result of every operation can be represented exactly.
108 
109 #ifndef S2_UTIL_MATH_EXACTFLOAT_EXACTFLOAT_H_
110 #define S2_UTIL_MATH_EXACTFLOAT_EXACTFLOAT_H_
111 
112 #include <algorithm>
113 #include <climits>
114 #include <cmath>
115 #include <algorithm>
116 #include <iostream>
117 #include <string>
118 using std::string;
119 
120 #include <openssl/bn.h>
121 
122 #include "base/logging.h"
123 #include "base/integral_types.h"
124 
125 class ExactFloat {
126  public:
127  // The following limits are imposed by OpenSSL.
128 
129  // The maximum exponent supported. If a value has an exponent larger than
130  // this, it is replaced by infinity (with the appropriate sign).
131  static const int kMaxExp = 200*1000*1000; // About 10**(60 million)
132 
133  // The minimum exponent supported. If a value has an exponent less than
134  // this, it is replaced by zero (with the appropriate sign).
135  static const int kMinExp = -kMaxExp; // About 10**(-60 million)
136 
137  // The maximum number of mantissa bits supported. If a value has more
138  // mantissa bits than this, it is replaced with NaN. (It is expected that
139  // users of this class will never want this much precision.)
140  static const int kMaxPrec = 64 << 20; // About 20 million digits
141 
142  // Rounding modes. kRoundTiesToEven and kRoundTiesAwayFromZero both round
143  // to the nearest representable value unless two values are equally close.
144  // In that case kRoundTiesToEven rounds to the nearest even value, while
145  // kRoundTiesAwayFromZero always rounds away from zero.
146  enum RoundingMode {
147  kRoundTiesToEven,
148  kRoundTiesAwayFromZero,
149  kRoundTowardZero,
150  kRoundAwayFromZero,
151  kRoundTowardPositive,
152  kRoundTowardNegative
153  };
154 
156  // Constructors
157 
158  // The default constructor initializes the value to zero. (The initial
159  // value must be zero rather than NaN for compatibility with the built-in
160  // float types.)
161  inline ExactFloat();
162 
163  // Construct an ExactFloat from a "double". The constructor is implicit so
164  // that this class can be used as a replacement for "float" or "double" in
165  // templatized libraries. (With an explicit constructor, code such as
166  // "ExactFloat f = 2.5;" would not compile.) All double-precision values are
167  // supported, including denormalized numbers, infinities, and NaNs.
168  ExactFloat(double v);
169 
170  // Construct an ExactFloat from an "int". Note that in general, ints are
171  // automatically converted to doubles and so would be handled by the
172  // constructor above. However, the particular argument (0) would be
173  // ambiguous; the compiler wouldn't know whether to treat it as a "double" or
174  // "const char*" (since 0 is a valid null pointer constant). Adding an "int"
175  // constructor solves this problem.
176  //
177  // We do not provide constructors for "unsigned", "long", "unsigned long",
178  // "long long", or "unsigned long long", since these types are not typically
179  // used in floating-point calculations and it is safer to require them to be
180  // explicitly cast.
181  ExactFloat(int v);
182 
183  // Construct an ExactFloat from a string (such as "1.2e50"). Requires that
184  // the value is exactly representable as a floating-point number (so for
185  // example, "0.125" is allowed but "0.1" is not).
186  explicit ExactFloat(const char* s) { Unimplemented(); }
187 
188  // Copy constructor.
189  ExactFloat(const ExactFloat& b);
190 
191  // The destructor is not virtual for efficiency reasons. Therefore no
192  // subclass should declare additional fields that require destruction.
193  inline ~ExactFloat() = default;
194 
196  // Constants
197  //
198  // As an alternative to the constants below, you can also just use the
199  // constants defined in <math.h>, for example:
200  //
201  // ExactFloat x = NAN, y = -INFINITY;
202 
203  // Return an ExactFloat equal to positive zero (if sign >= 0) or
204  // negative zero (if sign < 0).
205  static ExactFloat SignedZero(int sign);
206 
207  // Return an ExactFloat equal to positive infinity (if sign >= 0) or
208  // negative infinity (if sign < 0).
209  static ExactFloat Infinity(int sign);
210 
211  // Return an ExactFloat that is NaN (Not-a-Number).
212  static ExactFloat NaN();
213 
215  // Accessor Methods
216 
217  // Return the maximum precision of the ExactFloat. This method exists only
218  // for compatibility with MPFloat.
219  int max_prec() const { return kMaxPrec; }
220 
221  // Return the actual precision of this ExactFloat (the current number of
222  // bits in its mantissa). Returns 0 for non-normal numbers such as NaN.
223  int prec() const;
224 
225  // Return the exponent of this ExactFloat given that the mantissa is in the
226  // range [0.5, 1). It is an error to call this method if the value is zero,
227  // infinity, or NaN.
228  int exp() const;
229 
230  // Set the value of the ExactFloat to +0 (if sign >= 0) or -0 (if sign < 0).
231  void set_zero(int sign);
232 
233  // Set the value of the ExactFloat to positive infinity (if sign >= 0) or
234  // negative infinity (if sign < 0).
235  void set_inf(int sign);
236 
237  // Set the value of the ExactFloat to NaN (Not-a-Number).
238  void set_nan();
239 
240  // Unfortunately, isinf(x), isnan(x), isnormal(x), and isfinite(x) are
241  // defined as macros in <math.h>. Therefore we can't easily extend them
242  // here. Instead we provide methods with underscores in their names that do
243  // the same thing: x.is_inf(), etc.
244  //
245  // These macros are not implemented: signbit(x), fpclassify(x).
246 
247  // Return true if this value is zero (including negative zero).
248  inline bool is_zero() const;
249 
250  // Return true if this value is infinity (positive or negative).
251  inline bool is_inf() const;
252 
253  // Return true if this value is NaN (Not-a-Number).
254  inline bool is_nan() const;
255 
256  // Return true if this value is a normal floating-point number. Non-normal
257  // values (zero, infinity, and NaN) often need to be handled separately
258  // because they are represented using special exponent values and their
259  // mantissa is not defined.
260  inline bool is_normal() const;
261 
262  // Return true if this value is a normal floating-point number or zero,
263  // i.e. it is not infinity or NaN.
264  inline bool is_finite() const;
265 
266  // Return true if the sign bit is set (this includes negative zero).
267  inline bool sign_bit() const;
268 
269  // Return +1 if this ExactFloat is positive, -1 if it is negative, and 0
270  // if it is zero or NaN. Note that unlike sign_bit(), sgn() returns 0 for
271  // both positive and negative zero.
272  inline int sgn() const;
273 
275  // Conversion Methods
276  //
277  // Note that some conversions are defined as functions further below,
278  // e.g. to convert to an integer you can use lround(), llrint(), etc.
279 
280  // Round to double precision. Note that since doubles have a much smaller
281  // exponent range than ExactFloats, very small values may be rounded to
282  // (positive or negative) zero, and very large values may be rounded to
283  // infinity.
284  //
285  // It is very important to make this a named method rather than an implicit
286  // conversion, because otherwise there would be a silent loss of precision
287  // whenever some desired operator or function happens not to be implemented.
288  // For example, if fabs() were not implemented and "x" and "y" were
289  // ExactFloats, then x = fabs(y) would silently convert "y" to a "double",
290  // take its absolute value, and convert it back to an ExactFloat.
291  double ToDouble() const;
292 
293  // Return a human-readable string such that if two values with the same
294  // precision are different, then their string representations are different.
295  // The format is similar to printf("%g"), except that the number of
296  // significant digits depends on the precision (with a minimum of 10).
297  // Trailing zeros are stripped (just like "%g").
298  //
299  // Note that if two values have different precisions, they may have the same
300  // ToString() value even though their values are slightly different. If you
301  // need to distinguish such values, use ToUniqueString() intead.
302  string ToString() const;
303 
304  // Return a string formatted according to printf("%Ng") where N is the given
305  // maximum number of significant digits.
306  string ToStringWithMaxDigits(int max_digits) const;
307 
308  // Return a human-readable string such that if two ExactFloats have different
309  // values, then their string representations are always different. This
310  // method is useful for debugging. The string has the form "value<prec>",
311  // where "prec" is the actual precision of the ExactFloat (e.g., "0.215<50>").
312  string ToUniqueString() const;
313 
314  // Return an upper bound on the number of significant digits required to
315  // distinguish any two floating-point numbers with the given precision when
316  // they are formatted as decimal strings in exponential format.
317  static int NumSignificantDigitsForPrec(int prec);
318 
319  // Output the ExactFloat in human-readable format, e.g. for logging.
320  friend std::ostream& operator<<(std::ostream& o, ExactFloat const& f) {
321  return o << f.ToString();
322  }
323 
325  // Other Methods
326 
327  // Round the ExactFloat so that its mantissa has at most "max_prec" bits
328  // using the given rounding mode. Requires "max_prec" to be at least 2
329  // (since kRoundTiesToEven doesn't make sense with fewer bits than this).
330  ExactFloat RoundToMaxPrec(int max_prec, RoundingMode mode) const;
331 
333  // Operators
334 
335  // Assignment operator.
336  ExactFloat& operator=(const ExactFloat& b);
337 
338  // Unary plus.
339  ExactFloat operator+() const { return *this; }
340 
341  // Unary minus.
342  ExactFloat operator-() const;
343 
344  // Addition.
345  friend ExactFloat operator+(const ExactFloat& a, const ExactFloat& b);
346 
347  // Subtraction.
348  friend ExactFloat operator-(const ExactFloat& a, const ExactFloat& b);
349 
350  // Multiplication.
351  friend ExactFloat operator*(const ExactFloat& a, const ExactFloat& b);
352 
353  // Division is not implemented because the result cannot be represented
354  // exactly in general. Doing this properly would require extending all the
355  // operations to support rounding to a specified precision.
356 
357  // Arithmetic assignment operators (+=, -=, *=).
358  ExactFloat& operator+=(const ExactFloat& b) { return (*this = *this + b); }
359  ExactFloat& operator-=(const ExactFloat& b) { return (*this = *this - b); }
360  ExactFloat& operator*=(const ExactFloat& b) { return (*this = *this * b); }
361 
362  // Comparison operators (==, !=, <, <=, >, >=).
363  friend bool operator==(const ExactFloat& a, const ExactFloat& b);
364  friend bool operator<(const ExactFloat& a, const ExactFloat& b);
365  // These don't need to be friends but are declared here for completeness.
366  inline friend bool operator!=(const ExactFloat& a, const ExactFloat& b);
367  inline friend bool operator<=(const ExactFloat& a, const ExactFloat& b);
368  inline friend bool operator>(const ExactFloat& a, const ExactFloat& b);
369  inline friend bool operator>=(const ExactFloat& a, const ExactFloat& b);
370 
372  // Math Intrinsics
373  //
374  // The math intrinsics currently supported by ExactFloat are listed below.
375  // Except as noted, they behave identically to the usual glibc intrinsics
376  // except that they have greater precision. See the man pages for more
377  // information.
378 
380 
381  // Absolute value.
382  friend ExactFloat fabs(const ExactFloat& a);
383  friend ExactFloat abs(const ExactFloat& a);
384 
385  // Maximum of two values.
386  friend ExactFloat fmax(const ExactFloat& a, const ExactFloat& b);
387 
388  // Minimum of two values.
389  friend ExactFloat fmin(const ExactFloat& a, const ExactFloat& b);
390 
391  // Positive difference: max(a - b, 0).
392  friend ExactFloat fdim(const ExactFloat& a, const ExactFloat& b);
393 
395 
396  // Round up to the nearest integer.
397  friend ExactFloat ceil(const ExactFloat& a);
398 
399  // Round down to the nearest integer.
400  friend ExactFloat floor(const ExactFloat& a);
401 
402  // Round to the nearest integer not larger in absolute value.
403  // For example: f(-1.9) = -1, f(2.9) = 2.
404  friend ExactFloat trunc(const ExactFloat& a);
405 
406  // Round to the nearest integer, rounding halfway cases away from zero.
407  // For example: f(-0.5) = -1, f(0.5) = 1, f(1.5) = 2, f(2.5) = 3.
408  friend ExactFloat round(const ExactFloat& a);
409 
410  // Round to the nearest integer, rounding halfway cases to an even integer.
411  // For example: f(-0.5) = 0, f(0.5) = 0, f(1.5) = 2, f(2.5) = 2.
412  friend ExactFloat rint(const ExactFloat& a);
413 
414  // A synonym for rint().
415  friend ExactFloat nearbyint(const ExactFloat& a) { return rint(a); }
416 
418 
419  // Like rint(), but rounds to the nearest "long" value. Returns the
420  // minimum/maximum possible integer if the value is out of range.
421  friend long lrint(const ExactFloat& a);
422 
423  // Like rint(), but rounds to the nearest "long long" value. Returns the
424  // minimum/maximum possible integer if the value is out of range.
425  friend long long llrint(const ExactFloat& a);
426 
427  // Like round(), but rounds to the nearest "long" value. Returns the
428  // minimum/maximum possible integer if the value is out of range.
429  friend long lround(const ExactFloat& a);
430 
431  // Like round(), but rounds to the nearest "long long" value. Returns the
432  // minimum/maximum possible integer if the value is out of range.
433  friend long long llround(const ExactFloat& a);
434 
436 
437  // The remainder of dividing "a" by "b", where the quotient is rounded toward
438  // zero to the nearest integer. Similar to (a - trunc(a / b) * b).
439  friend ExactFloat fmod(const ExactFloat& a, const ExactFloat& b) {
440  // Note that it is possible to implement this operation exactly, it just
441  // hasn't been done.
442  return Unimplemented();
443  }
444 
445  // The remainder of dividing "a" by "b", where the quotient is rounded to the
446  // nearest integer, rounding halfway cases to an even integer. Similar to
447  // (a - rint(a / b) * b).
448  friend ExactFloat remainder(const ExactFloat& a, const ExactFloat& b) {
449  // Note that it is possible to implement this operation exactly, it just
450  // hasn't been done.
451  return Unimplemented();
452  }
453 
454  // A synonym for remainder().
455  friend ExactFloat drem(const ExactFloat& a, const ExactFloat& b) {
456  return remainder(a, b);
457  }
458 
459  // Break the argument "a" into integer and fractional parts, each of which
460  // has the same sign as "a". The fractional part is returned, and the
461  // integer part is stored in the output parameter "i_ptr". Both output
462  // values are set to have the same maximum precision as "a".
463  friend ExactFloat modf(const ExactFloat& a, ExactFloat* i_ptr) {
464  // Note that it is possible to implement this operation exactly, it just
465  // hasn't been done.
466  return Unimplemented();
467  }
468 
470 
471  // Return an ExactFloat with the magnitude of "a" and the sign bit of "b".
472  // (Note that an IEEE zero can be either positive or negative.)
473  friend ExactFloat copysign(const ExactFloat& a, const ExactFloat& b);
474 
475  // Convert "a" to a normalized fraction in the range [0.5, 1) times a power
476  // of two. Return the fraction and set "exp" to the exponent. If "a" is
477  // zero, infinity, or NaN then return "a" and set "exp" to zero.
478  friend ExactFloat frexp(const ExactFloat& a, int* exp);
479 
480  // Return "a" multiplied by 2 raised to the power "exp".
481  friend ExactFloat ldexp(const ExactFloat& a, int exp);
482 
483  // A synonym for ldexp().
484  friend ExactFloat scalbn(const ExactFloat& a, int exp) {
485  return ldexp(a, exp);
486  }
487 
488  // A version of ldexp() where "exp" is a long integer.
489  friend ExactFloat scalbln(const ExactFloat& a, long exp);
490 
491  // Convert "a" to a normalized fraction in the range [1,2) times a power of
492  // two, and return the exponent value as an integer. This is equivalent to
493  // lrint(floor(log2(fabs(a)))) but it is computed more efficiently. Returns
494  // the constants documented in the man page for zero, infinity, or NaN.
495  friend int ilogb(const ExactFloat& a);
496 
497  // Convert "a" to a normalized fraction in the range [1,2) times a power of
498  // two, and return the exponent value as an ExactFloat. This is equivalent to
499  // floor(log2(fabs(a))) but it is computed more efficiently.
500  friend ExactFloat logb(const ExactFloat& a);
501 
502  protected:
503  // OpenSSL >= 1.1 does not have BN_init, and does not support stack-
504  // allocated BIGNUMS. We use BN_init when possible, but BN_new otherwise.
505  // If the performance penalty is too high, an object pool can be added
506  // in the future.
507 #if defined(OPENSSL_IS_BORINGSSL) || OPENSSL_VERSION_NUMBER < 0x10100000L
508  // BoringSSL and OpenSSL < 1.1 support stack allocated BIGNUMs and BN_init.
509  class BigNum {
510  public:
511  BigNum() { BN_init(&bn_); }
512  // Prevent accidental, expensive, copying.
513  BigNum(const BigNum&) = delete;
514  BigNum& operator=(const BigNum&) = delete;
515  ~BigNum() { BN_free(&bn_); }
516  BIGNUM* get() { return &bn_; }
517  const BIGNUM* get() const { return &bn_; }
518  private:
519  BIGNUM bn_;
520  };
521 #else
522  class BigNum {
523  public:
524  BigNum() : bn_(BN_new()) {}
525  BigNum(const BigNum&) = delete;
526  BigNum& operator=(const BigNum&) = delete;
527  ~BigNum() { BN_free(bn_); }
528  BIGNUM* get() { return bn_; }
529  const BIGNUM* get() const { return bn_; }
530  private:
531  BIGNUM* bn_;
532  };
533 #endif
534 
535  // Non-normal numbers are represented using special exponent values and a
536  // mantissa of zero. Do not change these values; methods such as
537  // is_normal() make assumptions about their ordering. Non-normal numbers
538  // can have either a positive or negative sign (including zero and NaN).
539  static const int32 kExpNaN = INT_MAX;
540  static const int32 kExpInfinity = INT_MAX - 1;
541  static const int32 kExpZero = INT_MAX - 2;
542 
543  // Normal numbers are represented as (sign_ * bn_ * (2 ** bn_exp_)), where:
544  // - sign_ is either +1 or -1
545  // - bn_ is a BIGNUM with a positive value
546  // - bn_exp_ is the base-2 exponent applied to bn_.
547  int32 sign_;
548  int32 bn_exp_;
549  BigNum bn_;
550 
551  // A standard IEEE "double" has a 53-bit mantissa consisting of a 52-bit
552  // fraction plus an implicit leading "1" bit.
553  static const int kDoubleMantissaBits = 53;
554 
555  // Convert an ExactFloat with no more than 53 bits in its mantissa to a
556  // "double". This method handles non-normal values (NaN, etc).
557  double ToDoubleHelper() const;
558 
559  // Round an ExactFloat so that it is a multiple of (2 ** bit_exp), using the
560  // given rounding mode.
561  ExactFloat RoundToPowerOf2(int bit_exp, RoundingMode mode) const;
562 
563  // Convert the ExactFloat to a decimal value of the form 0.ddd * (10 ** x),
564  // with at most "max_digits" significant digits (trailing zeros are removed).
565  // Set (*digits) to the ASCII digits and return the decimal exponent "x".
566  int GetDecimalDigits(int max_digits, string* digits) const;
567 
568  // Return a_sign * fabs(a) + b_sign * fabs(b). Used to implement addition
569  // and subtraction.
570  static ExactFloat SignedSum(int a_sign, const ExactFloat* a,
571  int b_sign, const ExactFloat* b);
572 
573  // Convert an ExactFloat to its canonical form. Underflow results in signed
574  // zero, overflow results in signed infinity, and precision overflow results
575  // in NaN. A zero mantissa is converted to the canonical zero value with
576  // the given sign; otherwise the mantissa is normalized so that its low bit
577  // is 1. Non-normal numbers are left unchanged.
578  void Canonicalize();
579 
580  // Scale the mantissa of this ExactFloat so that it has the same bn_exp_ as
581  // "b", then return -1, 0, or 1 according to whether the scaled mantissa is
582  // less, equal, or greater than the mantissa of "b". Requires that both
583  // values are normal.
584  int ScaleAndCompare(const ExactFloat& b) const;
585 
586  // Return true if the magnitude of this ExactFloat is less than the
587  // magnitude of "b". Requires that neither value is NaN.
588  bool UnsignedLess(const ExactFloat& b) const;
589 
590  // Return an ExactFloat with the magnitude of this ExactFloat and the given
591  // sign. (Similar to copysign, except that the sign is given explicitly
592  // rather than being copied from another ExactFloat.)
593  inline ExactFloat CopyWithSign(int sign) const;
594 
595  // Convert an ExactFloat to an integer of type "T" using the given rounding
596  // mode. The type "T" must be signed. Returns the largest possible integer
597  // for NaN, and clamps out of range values to the largest or smallest
598  // possible values.
599  template <class T> T ToInteger(RoundingMode mode) const;
600 
601  // Log a fatal error message (used for unimplemented methods).
602  static ExactFloat Unimplemented();
603 };
604 
606 // Implementation details follow:
607 
608 inline ExactFloat::ExactFloat() : sign_(1), bn_exp_(kExpZero) {
609 }
610 
611 inline bool ExactFloat::is_zero() const { return bn_exp_ == kExpZero; }
612 inline bool ExactFloat::is_inf() const { return bn_exp_ == kExpInfinity; }
613 inline bool ExactFloat::is_nan() const { return bn_exp_ == kExpNaN; }
614 inline bool ExactFloat::is_normal() const { return bn_exp_ < kExpZero; }
615 inline bool ExactFloat::is_finite() const { return bn_exp_ <= kExpZero; }
616 inline bool ExactFloat::sign_bit() const { return sign_ < 0; }
617 
618 inline int ExactFloat::sgn() const {
619  return (is_nan() || is_zero()) ? 0 : sign_;
620 }
621 
622 inline bool operator!=(const ExactFloat& a, const ExactFloat& b) {
623  return !(a == b);
624 }
625 
626 inline bool operator<=(const ExactFloat& a, const ExactFloat& b) {
627  // NaN is unordered compared to everything, including itself.
628  if (a.is_nan() || b.is_nan()) return false;
629  return !(b < a);
630 }
631 
632 inline bool operator>(const ExactFloat& a, const ExactFloat& b) {
633  return b < a;
634 }
635 
636 inline bool operator>=(const ExactFloat& a, const ExactFloat& b) {
637  return b <= a;
638 }
639 
640 inline ExactFloat ExactFloat::CopyWithSign(int sign) const {
641  ExactFloat r = *this;
642  r.sign_ = sign;
643  return r;
644 }
645 
646 #endif // S2_UTIL_MATH_EXACTFLOAT_EXACTFLOAT_H_