mathutil.h
1 // Copyright 2001 and onwards Google Inc.
2 //
3 // This class is intended to contain a collection of useful (static)
4 // mathematical functions, properly coded (by consulting numerical
5 // recipes or another authoritative source first).
6 
7 #ifndef UTIL_MATH_MATHUTIL_H__
8 #define UTIL_MATH_MATHUTIL_H__
9 
10 #include <math.h>
11 #include <algorithm>
12 #include <vector>
13 
14 #include "base/casts.h"
15 #include "base/integral_types.h"
16 #include "base/logging.h"
17 #include "base/macros.h"
18 #include "util/math/mathlimits.h"
19 
20 // ========================================================================= //
21 
22 class MathUtil {
23  public:
24  // --------------------------------------------------------------------
25  // FastIntRound, FastInt64Round
26  // Fast routines for converting floating-point numbers to integers.
27  //
28  // These routines are approximately 6 times faster than the default
29  // implementation of IntRound() on Intel processors (12 times faster on
30  // the Pentium 3). They are also more than 5 times faster than simply
31  // casting a "double" to an "int" using static_cast<int>. This is
32  // because casts are defined to truncate towards zero, which on Intel
33  // processors requires changing the rounding mode and flushing the
34  // floating-point pipeline (unless programs are compiled specifically
35  // for the Pentium 4, which has a new instruction to avoid this).
36  //
37  // Numbers that are halfway between two integers may be rounded up or
38  // down. This is because the conversion is done using the default
39  // rounding mode, which rounds towards the closest even number in case
40  // of ties. So for example, FastIntRound(0.5) == 0, but
41  // FastIntRound(1.5) == 2. These functions should only be used with
42  // applications that don't care about which way such half-integers are
43  // rounded.
44  //
45  // There are template specializations of Round() which call these
46  // functions (for "int" and "int64" only), but it's safer to call them
47  // directly.
48  //
49  // This functions are equivalent to lrint() and llrint() as defined in
50  // the ISO C99 standard. Unfortunately this standard does not seem to
51  // widely adopted yet and these functions are not available by default.
52  // --------------------------------------------------------------------
53 
54  static int32 FastIntRound(double x) {
55  // This function is not templatized because gcc doesn't seem to be able
56  // to deal with inline assembly code in templatized functions, and there
57  // is no advantage to passing an argument type of "float" on Intel
58  // architectures anyway.
59 
60 #if defined __GNUC__ && (defined __i386__ || defined __SSE2__)
61 #if defined __SSE2__
62  // SSE2.
63  int32 result;
64  __asm__ __volatile__
65  ("cvtsd2si %1, %0"
66  : "=r" (result) // Output operand is a register
67  : "x" (x)); // Input operand is an xmm register
68  return result;
69 #elif defined __i386__
70  // FPU stack. Adapted from /usr/include/bits/mathinline.h.
71  int32 result;
72  __asm__ __volatile__
73  ("fistpl %0"
74  : "=m" (result) // Output operand is a memory location
75  : "t" (x) // Input operand is top of FP stack
76  : "st"); // Clobbers (pops) top of FP stack
77  return result;
78 #endif // if defined __x86_64__ || ...
79 #else
80  return Round<int32, double>(x);
81 #endif // if defined __GNUC__ && ...
82  }
83 
84  static int64 FastInt64Round(double x) {
85 #if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
86 #if defined __x86_64__
87  // SSE2.
88  int64 result;
89  __asm__ __volatile__
90  ("cvtsd2si %1, %0"
91  : "=r" (result) // Output operand is a register
92  : "x" (x)); // Input operand is an xmm register
93  return result;
94 #elif defined __i386__
95  // There is no CVTSD2SI in i386 to produce a 64 bit int, even with SSE2.
96  // FPU stack. Adapted from /usr/include/bits/mathinline.h.
97  int64 result;
98  __asm__ __volatile__
99  ("fistpll %0"
100  : "=m" (result) // Output operand is a memory location
101  : "t" (x) // Input operand is top of FP stack
102  : "st"); // Clobbers (pops) top of FP stack
103  return result;
104 #endif // if defined __i386__
105 #else
106  return Round<int64, double>(x);
107 #endif // if defined __GNUC__ && ...
108  }
109 
110  // Return Not a Number.
111  // Consider instead using MathLimits<double>::kNaN directly.
112  static double NaN() { return MathLimits<double>::kNaN; }
113 
114  // Returns an approximation An for the n-th element of the harmonic
115  // serices Hn = 1 + ... + 1/n. Sets error e such that |An-Hn| < e.
116  static double Harmonic(int64 n, double *e);
117 
118  // Returns Stirling's Approximation for log(n!) which has an error
119  // of at worst 1/(1260*n^5).
120  static double Stirling(double n);
121 
122  // Returns the log of the binomial coefficient C(n, k), known in the
123  // vernacular as "N choose K". Why log? Because the integer number
124  // for non-trivial N and K would overflow.
125  // Note that if k > 15, this uses Stirling's approximation of log(n!).
126  // The relative error is about 1/(1260*k^5) (which is 7.6e-10 when k=16).
127  static double LogCombinations(int n, int k);
128 
129  // Rounds "f" to the nearest float which has its last "bits" bits of
130  // the mantissa set to zero. This rounding will introduce a
131  // fractional error of at most 2^(bits - 24). Useful for values
132  // stored in compressed files, when super-accurate numbers aren't
133  // needed and the random-looking low-order bits foil compressors.
134  // This routine should be really fast when inlined with "bits" set
135  // to a constant.
136  // Precondition: 1 <= bits <= 23, f != NaN
137  static float RoundOffBits(const float f, const int bits) {
138  const int32 f_rep = bit_cast<int32>(f);
139 
140  // Set low-order "bits" bits to zero.
141  int32 g_rep = f_rep & ~((1 << bits) - 1);
142 
143  // Round mantissa up if we need to. Note that we do round-to-even,
144  // a.k.a. round-up-if-odd.
145  const int32 lowbits = f_rep & ((1 << bits) - 1);
146  if (lowbits > (1 << (bits - 1)) ||
147  (lowbits == (1 << (bits - 1)) && (f_rep & (1 << bits)))) {
148  g_rep += (1 << bits);
149  // NOTE: overflow does a really nice thing here - if all the
150  // rest of the mantissa bits are 1, the carry carries over into
151  // the exponent and increments it by 1, which is exactly what we
152  // want. It even gets to +/-INF properly.
153  }
154  return bit_cast<float>(g_rep);
155  }
156  // Same, but for doubles. 1 <= bits <= 52, error at most 2^(bits - 53).
157  static double RoundOffBits(const double f, const int bits) {
158  const int64 f_rep = bit_cast<int64>(f);
159  int64 g_rep = f_rep & ~((1LL << bits) - 1);
160  const int64 lowbits = f_rep & ((1LL << bits) - 1);
161  if (lowbits > (1LL << (bits - 1)) ||
162  (lowbits == (1LL << (bits - 1)) && (f_rep & (1LL << bits)))) {
163  g_rep += (1LL << bits);
164  }
165  return bit_cast<double>(g_rep);
166  }
167 
168  // Largest of two values.
169  // Works correctly for special floating point values.
170  // Note: 0.0 and -0.0 are not differentiated by Max (Max(0.0, -0.0) is -0.0),
171  // which should be OK because, although they (can) have different
172  // bit representation, they are observably the same when examined
173  // with arithmetic and (in)equality operators.
174  template<typename T>
175  static T Max(const T x, const T y) {
176  return MathLimits<T>::IsNaN(x) || x > y ? x : y;
177  }
178 
179  // Smallest of two values
180  // Works correctly for special floating point values.
181  // Note: 0.0 and -0.0 are not differentiated by Min (Min(-0.0, 0.0) is 0.0),
182  // which should be OK: see the comment for Max above.
183  template<typename T>
184  static T Min(const T x, const T y) {
185  return MathLimits<T>::IsNaN(x) || x < y ? x : y;
186  }
187 
188  // Absolute value of x
189  // Works correctly for unsigned types and
190  // for special floating point values.
191  // Note: 0.0 and -0.0 are not differentiated by Abs (Abs(0.0) is -0.0),
192  // which should be OK: see the comment for Max above.
193  template<typename T>
194  static T Abs(const T x) {
195  return x > 0 ? x : -x;
196  }
197 
198  // Absolute value of the difference between two numbers.
199  // Works correctly for signed types and special floating point values.
200  template<typename T>
201  static typename MathLimits<T>::UnsignedType AbsDiff(const T x, const T y) {
202  // Carries out arithmetic as unsigned to avoid overflow.
203  typedef typename MathLimits<T>::UnsignedType R;
204  return x > y ? R(x) - R(y) : R(y) - R(x);
205  }
206 
207  // The sign of x
208  // (works for unsigned types and special floating point values as well):
209  // -1 if x < 0,
210  // +1 if x > 0,
211  // 0 if x = 0.
212  // nan if x is nan.
213  template<typename T>
214  static T Sign(const T x) {
215  return MathLimits<T>::IsNaN(x) ? x : (x == 0 ? 0 : (x > 0 ? 1 : -1));
216  }
217 
218  // CAVEAT: Floating point computation instability for x86 CPUs
219  // can frequently stem from the difference of when floating point values
220  // are transferred from registers to memory and back,
221  // which can depend simply on the level of optimization.
222  // The reason is that the registers use a higher-precision representation.
223  // Hence, instead of relying on approximate floating point equality below
224  // it might be better to reorganize the code with volatile modifiers
225  // for the floating point variables so as to control when
226  // the loss of precision occurs.
227 
228  // If two (usually floating point) numbers are within a certain
229  // absolute margin of error.
230  // NOTE: this "misbehaves" is one is trying to capture provisons for errors
231  // that are relative, i.e. larger if the numbers involved are larger.
232  // Consider using WithinFraction or WithinFractionOrMargin below.
233  //
234  // This and other Within* NearBy* functions below
235  // work correctly for signed types and special floating point values.
236  template<typename T>
237  static bool WithinMargin(const T x, const T y, const T margin) {
238  DCHECK_GE(margin, 0);
239  // this is a little faster than x <= y + margin && x >= y - margin
240  return AbsDiff(x, y) <= margin;
241  }
242 
243  // If two (usually floating point) numbers are within a certain
244  // fraction of their magnitude.
245  // CAVEAT: Although this works well if computation errors are relative
246  // both for large magnitude numbers > 1 and for small magnitude numbers < 1,
247  // zero is never within a fraction of any
248  // non-zero finite number (fraction is required to be < 1).
249  template<typename T>
250  static bool WithinFraction(const T x, const T y, const T fraction);
251 
252  // If two (usually floating point) numbers are within a certain
253  // fraction of their magnitude or within a certain absolute margin of error.
254  // This is the same as the following but faster:
255  // WithinFraction(x, y, fraction) || WithinMargin(x, y, margin)
256  // E.g. WithinFraction(0.0, 1e-10, 1e-5) is false but
257  // WithinFractionOrMargin(0.0, 1e-10, 1e-5, 1e-5) is true.
258  template<typename T>
259  static bool WithinFractionOrMargin(const T x, const T y,
260  const T fraction, const T margin);
261 
262  // NearBy* functions below are geared as replacements for CHECK_EQ()
263  // over floating-point numbers.
264 
265  // Same as WithinMargin(x, y, MathLimits<T>::kStdError)
266  // Works as == for integer types.
267  template<typename T>
268  static bool NearByMargin(const T x, const T y) {
269  return AbsDiff(x, y) <= MathLimits<T>::kStdError;
270  }
271 
272  // Same as WithinFraction(x, y, MathLimits<T>::kStdError)
273  // Works as == for integer types.
274  template<typename T>
275  static bool NearByFraction(const T x, const T y) {
276  return WithinFraction(x, y, MathLimits<T>::kStdError);
277  }
278 
279  // Same as WithinFractionOrMargin(x, y, MathLimits<T>::kStdError,
280  // MathLimits<T>::kStdError)
281  // Works as == for integer types.
282  template<typename T>
283  static bool NearByFractionOrMargin(const T x, const T y) {
284  return WithinFractionOrMargin(x, y, MathLimits<T>::kStdError,
286  }
287 
288  // Tests whether two values are close enough to each other to be considered
289  // equal. This function is intended to be used mainly as a replacement for
290  // equality tests of floating point values in CHECK()s, and as a replacement
291  // for equality comparison against golden files. It is the same as == for
292  // integer types. The purpose of AlmostEquals() is to avoid false positive
293  // error reports in unit tests and regression tests due to minute differences
294  // in floating point arithmetic (for example, due to a different compiler).
295  //
296  // We cannot use simple equality to compare floating point values
297  // because floating point expressions often accumulate inaccuracies, and
298  // new compilers may introduce further variations in the values.
299  //
300  // Two values x and y are considered "almost equals" if:
301  // (a) Both values are very close to zero: x and y are in the range
302  // [-standard_error, standard_error]
303  // Normal calculations producing these values are likely to be dealing
304  // with meaningless residual values.
305  // -or-
306  // (b) The difference between the values is small:
307  // abs(x - y) <= standard_error
308  // -or-
309  // (c) The values are finite and the relative difference between them is
310  // small:
311  // abs (x - y) <= standard_error * max(abs(x), abs(y))
312  // -or-
313  // (d) The values are both positive infinity or both negative infinity.
314  //
315  // Cases (b) and (c) are the same as MathUtils::NearByFractionOrMargin(x, y),
316  // for finite values of x and y.
317  //
318  // standard_error is the corresponding MathLimits<T>::kStdError constant.
319  // It is equivalent to 5 bits of mantissa error. See
320  // google3/util/math/mathlimits.cc.
321  //
322  // Caveat:
323  // AlmostEquals() is not appropriate for checking long sequences of
324  // operations where errors may cascade (like extended sums or matrix
325  // computations), or where significant cancellation may occur
326  // (e.g., the expression (x+y)-x, where x is much larger than y).
327  // Both cases may produce errors in excess of standard_error.
328  // In any case, you should not test the results of calculations which have
329  // not been vetted for possible cancellation errors and the like.
330  template<typename T>
331  static bool AlmostEquals(const T x, const T y) {
332  if (x == y) // Covers +inf and -inf, and is a shortcut for finite values.
333  return true;
335  return false;
336 
337  if (MathUtil::Abs<T>(x) <= MathLimits<T>::kStdError &&
338  MathUtil::Abs<T>(y) <= MathLimits<T>::kStdError)
339  return true;
340 
341  return MathUtil::NearByFractionOrMargin<T>(x, y);
342  }
343 
344  // Returns the clamped value to be between low and high inclusively.
345  template<typename T>
346  static const T& Clamp(const T& low, const T& high, const T& value) {
347  return std::max(low, std::min(value, high));
348  }
349 
350  // Clamps value to be between min and max inclusively.
351  template<typename T>
352  static void ClampValue(const T& low, const T& high, T* value) {
353  *value = Clamp(low, high, *value);
354  }
355 };
356 
357 template<typename T>
358 bool MathUtil::WithinFraction(const T x, const T y, const T fraction) {
359  // not just "0 <= fraction" to fool the compiler for unsigned types
360  DCHECK((0 < fraction || 0 == fraction) && fraction < 1);
361 
362  // Template specialization will convert the if() condition to a constant,
363  // which will cause the compiler to generate code for either the "if" part
364  // or the "then" part. In this way we avoid a compiler warning
365  // about a potential integer overflow in crosstool v12 (gcc 4.3.1).
367  return x == y;
368  } else {
369  // IsFinite checks are to make kPosInf and kNegInf not within fraction
371  (AbsDiff(x, y) <= fraction * Max(Abs(x), Abs(y)));
372  }
373 }
374 
375 template<typename T>
376 bool MathUtil::WithinFractionOrMargin(const T x, const T y,
377  const T fraction, const T margin) {
378  // not just "0 <= fraction" to fool the compiler for unsigned types
379  DCHECK((0 < fraction || 0 == fraction) && fraction < 1 && margin >= 0);
380 
381  // Template specialization will convert the if() condition to a constant,
382  // which will cause the compiler to generate code for either the "if" part
383  // or the "then" part. In this way we avoid a compiler warning
384  // about a potential integer overflow in crosstool v12 (gcc 4.3.1).
386  return x == y;
387  } else {
388  // IsFinite checks are to make kPosInf and kNegInf not within fraction
390  (AbsDiff(x, y) <= Max(margin, fraction * Max(Abs(x), Abs(y))));
391  }
392 }
393 
394 #endif // UTIL_MATH_MATHUTIL_H__