float2decimal.cc
1 // Copyright 2017, Beeri 15. All rights reserved.
2 // Author: Roman Gershman (romange@gmail.com)
3 //
4 #include "util/math/float2decimal.h"
5 #include "base/logging.h"
6 
7 #define FAST_DTOA_UNREACHABLE() __builtin_unreachable();
8 
9 namespace util {
10 
11 namespace dtoa {
12 namespace {
13 
14 //
15 // Given a (normalized) floating-point number v and its neighbors m- and m+
16 //
17 // ---+---------------------------+---------------------------+---
18 // m- v m+
19 //
20 // Grisu first scales the input number w, and its boundaries w- and w+, by an
21 // approximate power-of-ten c ~= 10^-k (which needs to be precomputed using
22 // high-precision arithmetic and stored in a table) such that the exponent of
23 // the products lies within a certain range [alpha, gamma]. It then remains to
24 // produce the decimal digits of a DiyFp number M = f * 2^e, where
25 // alpha <= e <= gamma.
26 //
27 // The choice of alpha and gamma determines the digit generation procedure and
28 // the size of the look-up table (or vice versa...) and depends on the
29 // extended precision q.
30 //
31 // In other words, given normalized w, Grisu needs to find a (normalized) cached
32 // power-of-ten c, such that the exponent of the product c * w = f * 2^e
33 // satisfies (Definition 3.2)
34 //
35 // alpha <= e = e_c + e_w + q <= gamma
36 //
37 // or
38 //
39 // f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
40 // <= f_c * f_w * 2^gamma
41 //
42 // Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
43 //
44 // 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
45 //
46 // or
47 //
48 // 2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
49 //
50 // The distance (gamma - alpha) should be as large as possible in order to make
51 // the table as small as possible, but the digit generation procedure should
52 // still be efficient.
53 //
54 // Assume q = 64 and e < 0. The idea is to cut the number c * w = f * 2^e into
55 // two parts, which can be processed independently: An integral part p1, and a
56 // fractional part p2:
57 //
58 // f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
59 // = (f div 2^-e) + (f mod 2^-e) * 2^e
60 // = p1 + p2 * 2^e
61 //
62 // The conversion of p1 into decimal form requires some divisions and modulos by
63 // a power-of-ten. These operations are faster for 32-bit than for 64-bit
64 // integers, so p1 should ideally fit into a 32-bit integer. This be achieved by
65 // choosing
66 //
67 // -e >= 32 or e <= -32 := gamma
68 //
69 // In order to convert the fractional part
70 //
71 // p2 * 2^e = d[-1] / 10^1 + d[-2] / 10^2 + ... + d[-k] / 10^k
72 //
73 // into decimal form, the fraction is repeatedly multiplied by 10 and the digits
74 // d[-i] are extracted in order:
75 //
76 // (10 * p2) div 2^-e = d[-1]
77 // (10 * p2) mod 2^-e = d[-2] / 10^1 + ... + d[-k] / 10^(k-1)
78 //
79 // The multiplication by 10 must not overflow. For this it is sufficient to have
80 //
81 // 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
82 //
83 // Since p2 = f mod 2^-e < 2^-e one may choose
84 //
85 // -e <= 60 or e >= -60 := alpha
86 //
87 // Different considerations may lead to different digit generation procedures
88 // and different values of alpha and gamma...
89 //
90 constexpr int const kAlpha = -60;
91 constexpr int const kGamma = -32;
92 
93 
94 // Grisu needs to find a(normalized) cached power-of-ten c, such that the
95 // exponent of the product c * w = f * 2^e satisfies (Definition 3.2)
96 //
97 // alpha <= e = e_c + e_w + q <= gamma
98 //
99 // For IEEE double precision floating-point numbers v converted into a DiyFp's
100 // w = f * 2^e,
101 //
102 // e >= -1022 (min IEEE exponent)
103 // -52 (IEEE significand size)
104 // -52 (possibly normalize denormal IEEE numbers)
105 // -11 (normalize the DiyFp)
106 // = -1137
107 //
108 // and
109 //
110 // e <= +1023 (max IEEE exponent)
111 // -52 (IEEE significand size)
112 // -11 (normalize the DiyFp)
113 // = 960
114 //
115 // (For IEEE single precision the exponent range is [-196, 80].)
116 //
117 // Now
118 //
119 // alpha <= e_c + e + q <= gamma
120 // ==> f_c * 2^alpha <= c * 2^e * 2^q
121 //
122 // and since the c's are normalized, 2^(q-1) <= f_c,
123 //
124 // ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
125 // ==> 2^(alpha - e - 1) <= c
126 //
127 // If c were an exakt power of ten, i.e. c = 10^k, one may determine k as
128 //
129 // k = ceil( log_10( 2^(alpha - e - 1) ) )
130 // = ceil( (alpha - e - 1) * log_10(2) )
131 //
132 // (From the paper)
133 // "In theory the result of the procedure could be wrong since c is rounded, and
134 // the computation itself is approximated [...]. In practice, however, this
135 // simple function is sufficient."
136 //
137 // The difference of the decimal exponents of adjacent table entries must be
138 // <= floor( (gamma - alpha) * log_10(2) ) = 8.
139 
140 struct CachedPower { // c = f * 2^e ~= 10^k
141  uint64_t f;
142  int e;
143  int k;
144 };
145 
146 // Returns a cached power-of-ten c, such that alpha <= e_c + e + q <= gamma.
147 CachedPower GetCachedPowerForBinaryExponent(int e) {
148  // NB:
149  // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
150 
151  static constexpr CachedPower const kCachedPowers[] = {
152  {0xAB70FE17C79AC6CA, -1060, -300}, // -1060 + 960 + 64 = -36
153  {0xFF77B1FCBEBCDC4F, -1034, -292}, {0xBE5691EF416BD60C, -1007, -284},
154  {0x8DD01FAD907FFC3C, -980, -276}, {0xD3515C2831559A83, -954, -268},
155  {0x9D71AC8FADA6C9B5, -927, -260}, {0xEA9C227723EE8BCB, -901, -252},
156  {0xAECC49914078536D, -874, -244}, {0x823C12795DB6CE57, -847, -236},
157  {0xC21094364DFB5637, -821, -228}, {0x9096EA6F3848984F, -794, -220},
158  {0xD77485CB25823AC7, -768, -212}, {0xA086CFCD97BF97F4, -741, -204},
159  {0xEF340A98172AACE5, -715, -196}, {0xB23867FB2A35B28E, -688, -188},
160  {0x84C8D4DFD2C63F3B, -661, -180}, {0xC5DD44271AD3CDBA, -635, -172},
161  {0x936B9FCEBB25C996, -608, -164}, {0xDBAC6C247D62A584, -582, -156},
162  {0xA3AB66580D5FDAF6, -555, -148}, {0xF3E2F893DEC3F126, -529, -140},
163  {0xB5B5ADA8AAFF80B8, -502, -132}, {0x87625F056C7C4A8B, -475, -124},
164  {0xC9BCFF6034C13053, -449, -116}, {0x964E858C91BA2655, -422, -108},
165  {0xDFF9772470297EBD, -396, -100}, {0xA6DFBD9FB8E5B88F, -369, -92},
166  {0xF8A95FCF88747D94, -343, -84}, {0xB94470938FA89BCF, -316, -76},
167  {0x8A08F0F8BF0F156B, -289, -68}, {0xCDB02555653131B6, -263, -60},
168  {0x993FE2C6D07B7FAC, -236, -52}, {0xE45C10C42A2B3B06, -210, -44},
169  {0xAA242499697392D3, -183, -36}, // -183 + 80 + 64 = -39
170  {0xFD87B5F28300CA0E, -157, -28}, //
171  {0xBCE5086492111AEB, -130, -20}, //
172  {0x8CBCCC096F5088CC, -103, -12}, //
173  {0xD1B71758E219652C, -77, -4}, //
174  {0x9C40000000000000, -50, 4}, //
175  {0xE8D4A51000000000, -24, 12}, //
176  {0xAD78EBC5AC620000, 3, 20}, //
177  {0x813F3978F8940984, 30, 28}, //
178  {0xC097CE7BC90715B3, 56, 36}, //
179  {0x8F7E32CE7BEA5C70, 83, 44}, // 83 - 196 + 64 = -49
180  {0xD5D238A4ABE98068, 109, 52}, {0x9F4F2726179A2245, 136, 60},
181  {0xED63A231D4C4FB27, 162, 68}, {0xB0DE65388CC8ADA8, 189, 76},
182  {0x83C7088E1AAB65DB, 216, 84}, {0xC45D1DF942711D9A, 242, 92},
183  {0x924D692CA61BE758, 269, 100}, {0xDA01EE641A708DEA, 295, 108},
184  {0xA26DA3999AEF774A, 322, 116}, {0xF209787BB47D6B85, 348, 124},
185  {0xB454E4A179DD1877, 375, 132}, {0x865B86925B9BC5C2, 402, 140},
186  {0xC83553C5C8965D3D, 428, 148}, {0x952AB45CFA97A0B3, 455, 156},
187  {0xDE469FBD99A05FE3, 481, 164}, {0xA59BC234DB398C25, 508, 172},
188  {0xF6C69A72A3989F5C, 534, 180}, {0xB7DCBF5354E9BECE, 561, 188},
189  {0x88FCF317F22241E2, 588, 196}, {0xCC20CE9BD35C78A5, 614, 204},
190  {0x98165AF37B2153DF, 641, 212}, {0xE2A0B5DC971F303A, 667, 220},
191  {0xA8D9D1535CE3B396, 694, 228}, {0xFB9B7CD9A4A7443C, 720, 236},
192  {0xBB764C4CA7A44410, 747, 244}, {0x8BAB8EEFB6409C1A, 774, 252},
193  {0xD01FEF10A657842C, 800, 260}, {0x9B10A4E5E9913129, 827, 268},
194  {0xE7109BFBA19C0C9D, 853, 276}, {0xAC2820D9623BF429, 880, 284},
195  {0x80444B5E7AA7CF85, 907, 292}, {0xBF21E44003ACDD2D, 933, 300},
196  {0x8E679C2F5E44FF8F, 960, 308}, {0xD433179D9C8CB841, 986, 316},
197  {0x9E19DB92B4E31BA9, 1013, 324}, // 1013 - 1137 + 64 = -60
198  };
199 
200  constexpr int const kCachedPowersSize = 79;
201  constexpr int const kCachedPowersMinDecExp = -300;
202 
203  // This computation gives exactly the same results for k as
204  //
205  // k = ceil((kAlpha - e - 1) * 0.30102999566398114)
206  //
207  // for |e| <= 1500, but doesn't require floating-point operations.
208  // NB: log_10(2) ~= 78913 / 2^18
209  assert(e >= -1500);
210  assert(e <= 1500);
211  int const f = kAlpha - e - 1;
212  int const k = (f * 78913) / (1 << 18) + (f > 0);
213 
214  int const index = (-kCachedPowersMinDecExp + k + (8 - 1)) / 8;
215  assert(index >= 0);
216  assert(index < kCachedPowersSize);
217  static_cast<void>(kCachedPowersSize); // Fix warning.
218 
219  CachedPower const cached = kCachedPowers[index];
220  assert(kAlpha <= cached.e + e + 64);
221  assert(kGamma >= cached.e + e + 64);
222 
223  // XXX:
224  // cached.k = kCachedPowersMinDecExp + 8*index
225 
226  return cached;
227 }
228 
229 // For n != 0, returns k, such that 10^(k-1) <= n < 10^k.
230 // For n == 0, returns 1.
231 inline unsigned InitKappa(uint32_t n) {
232  return base::CountDecimalDigit32(n);
233 }
234 
235 inline void Grisu2Round(char& digit, uint64_t dist, uint64_t delta, uint64_t rest,
236  uint64_t ten_k) {
237  // dist, delta, rest and ten_k all are the significands of
238  // floating-point numbers with an exponent e.
239  assert(dist <= delta);
240  assert(rest <= delta);
241  assert(ten_k > 0);
242 
243  //
244  // <--------------------------- delta ---->
245  // <---- dist --------->
246  // --------------[------------------+-------------------]--------------
247  // w- w w+
248  //
249  // 10^k
250  // <------>
251  // <---- rest ---->
252  // --------------[------------------+----+--------------]--------------
253  // w V
254  // = buf * 10^k
255  //
256  // ten_k represents a unit-in-the-last-place in the decimal representation
257  // stored in buf.
258  // Decrement buf by ten_k while this takes buf closer to w.
259  //
260 
261  while (rest < dist && delta - rest >= ten_k &&
262  (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) {
263  DCHECK_NE('0', digit);
264  digit--;
265  rest += ten_k;
266  }
267 }
268 
269 void SetDigits(uint32_t value, unsigned num_digits, char* buffer) {
270  const char DIGITS[] =
271  "0001020304050607080910111213141516171819"
272  "2021222324252627282930313233343536373839"
273  "4041424344454647484950515253545556575859"
274  "6061626364656667686970717273747576777879"
275  "8081828384858687888990919293949596979899";
276 
277  buffer += num_digits;
278 
279  while (value >= 100) {
280  // Integer division is slow so do it for a group of two digits instead
281  // of for every digit. The idea comes from the talk by Alexandrescu
282  // "Three Optimization Tips for C++".
283  unsigned index = static_cast<unsigned>((value % 100) * 2);
284  value /= 100;
285  *--buffer = DIGITS[index + 1];
286  *--buffer = DIGITS[index];
287  }
288  if (value < 10) {
289  *--buffer = static_cast<char>('0' + value);
290  return;
291  }
292  unsigned index = static_cast<unsigned>(value * 2);
293  *--buffer = DIGITS[index + 1];
294  *--buffer = DIGITS[index];
295 }
296 
297 std::pair<unsigned, int> Grisu2DigitGen(char* buffer, int decimal_exponent,
298  const Fp& M_minus, const Fp& w, const Fp& M_plus) {
299  static constexpr char const* const kDigits = "0123456789";
300 
301  static_assert(kAlpha >= -60, "invalid parameter");
302  static_assert(kGamma <= -32, "invalid parameter");
303 
304  assert(M_plus.e >= kAlpha);
305  assert(M_plus.e <= kGamma);
306 
307  uint64_t delta = M_plus.f - M_minus.f; // (significand of (w+ - w-), implicit exponent is e)
308  uint64_t dist = M_plus.f - w.f; // (significand of (w+ - w ), implicit exponent is e)
309 
310  // <--------------------------- delta ---->
311  // <---- dist --------->
312  // --------------[------------------+-------------------]--------------
313  // w- w w+
314  //
315  // Split w+ = f * 2^e into two parts p1 and p2 (note: e < 0):
316  //
317  // w+ = f * 2^e
318  // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
319  // = ((p1 ) * 2^-e + (p2 )) * 2^e
320  // = p1 + p2 * 2^e
321 
322  // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
323  const uint64_t e_shift = -M_plus.e;
324  const uint64_t e_mask = (1ULL << e_shift) - 1;
325 
326  uint32_t p1 = M_plus.f >> e_shift;
327  uint64_t p2 = M_plus.f & e_mask; // p2 = f mod 2^-e
328 
329  DCHECK_GT(p1, 0);
330 
331 //
332 // 1.
333 // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
334 //
335 
336  unsigned kappa = InitKappa(p1);
337 
338  // We now have
339  // (B = buffer, L = length = k - n)
340  //
341  // 10^(k-1) <= p1 < 10^k
342  //
343  // p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
344  // = (B[0] ) * 10^(k-1) + (p1 mod 10^(k-1))
345  //
346  // w+ = p1 + p2 * 2^e
347  // = B[0] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e
348  // = B[0] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
349  // = B[0] * 10^(k-1) + ( rest) * 2^e
350  //
351  // and generate the digits d of p1 from left to right:
352  //
353  // p1 = (B[0]B[1]...B[L -1]B[L]B[L+1] ...B[k-2]B[k-1])_10
354  // = (B[0]B[1]...B[L -1])_10 * 10^(k-L) + (B[L ]...B[k-2]B[k-1])_10 (L = 1...k)
355  // = (B[0]B[1]...B[k-n-1])_10 * 10^(n ) + (B[k-n ]...B[k-2]B[k-1])_10 (n = k...1)
356 
357  bool cut_early = delta >= p2;
358  uint32_t length = kappa;
359 
360  if (cut_early) {
361  uint32_t p1_remainder = 0;
362  uint32_t ten_n = 1;
363  unsigned power = 0;
364  uint32_t delta_shifted = (delta - p2) >> e_shift; // valid due to cut_early
365  bool check_increase_power = true;
366 
367  if (delta_shifted > 0) {
368  power = base::CountDecimalDigit32(delta_shifted);
369  ten_n = base::Power10(power);
370  p1_remainder = p1 % ten_n;
371  if (p1_remainder > delta_shifted) {
372  --power;
373  ten_n /= 10;
374  p1_remainder %= ten_n;
375  check_increase_power = false;
376  }
377  DCHECK_LE(p1_remainder, delta_shifted);
378  p1 /= ten_n;
379  length -= power;
380  }
381 
382  if (check_increase_power) {
383  while (p1 % 10 == 0) {
384  ++power;
385  ten_n *= 10;
386  p1 /= 10;
387  --length;
388  }
389  }
390 
391  SetDigits(p1, length, buffer);
392 
393  // Found V = buffer * 10^n, with w- <= V <= w+.
394  // And V is correctly rounded.
395  //
396  decimal_exponent += power;
397  if (dist > p2) {
398  // We may now just stop. But instead look if the buffer could be
399  // decremented to bring V closer to w.
400  //
401  // 10^n is now 1 ulp in the decimal representation V.
402  //
403  // The rounding procedure works with DiyFp's with an implicit
404  // exponent e.
405  //
406  // 10^n = ten_n * 2^e = (10^n * 2^-e) * 2^e
407  //
408  // Note:
409  // n has been decremented above, i.e. pow10 = 10^n
410  //
411 
412  uint64_t rest = (uint64_t(p1_remainder) << e_shift) + p2;
413  uint64_t ten_n64 = uint64_t(ten_n) << e_shift;
414 
415  while (rest < dist && delta - rest >= ten_n64 &&
416  ten_n64 / 2 < (dist - rest)) {
417  DCHECK_NE('0', buffer[length-1]);
418  buffer[length-1]--;
419  rest += ten_n64;
420  }
421  }
422 
423  return std::pair<unsigned, int>(length, decimal_exponent);
424  }
425 
426  SetDigits(p1, length, buffer);
427  assert(p2 != 0);
428  // (otherwise the loop above would have been exited with rest <= delta)
429 
430  //
431  // 2.
432  // The digits of the integral part have been generated:
433  //
434  // w+ = d[k-1]...d[1]d[0] + p2 * 2^e = buffer + p2 * 2^e
435  //
436  // Now generate the digits of the fractional part p2 * 2^e.
437  //
438  // Note:
439  // No decimal point is generated: the exponent is adjusted instead.
440  //
441  // p2 actually represents the fraction
442  //
443  // p2 * 2^e
444  // = p2 / 2^-e
445  // = d[-1] / 10^1 + d[-2] / 10^2 + d[-3] / 10^3 + ... + d[-m] / 10^m
446  //
447  // or
448  //
449  // 10 * p2 / 2^-e = d[-1] + (d[-2] / 10^1 + ... + d[-m] / 10^(m-1))
450  //
451  // and the digits can be obtained from left to right by
452  //
453  // (10 * p2) div 2^-e = d[-1]
454  // (10 * p2) mod 2^-e = d[-2] / 10^1 + ... + d[-m] / 10^(m-1)
455  //
456  DCHECK_GT(p2, delta);
457  int m = 0;
458  for (;;) {
459  // Invariant:
460  // 1. w+ = buffer * 10^m + 10^m * p2 * 2^e (Note: m <= 0)
461 
462  // p2 * 10 < 2^60 * 10 < 2^60 * 2^4 = 2^64,
463  // so the multiplication by 10 does not overflow.
464  DCHECK_LE(p2, e_mask);
465  p2 *= 10;
466 
467  uint64_t const d = p2 >> e_shift; // = p2 div 2^-e
468  uint64_t const r = p2 & e_mask; // = p2 mod 2^-e
469 
470  // w+ = buffer * 10^m + 10^m * p2 * 2^e
471  // = buffer * 10^m + 10^(m-1) * (10 * p2 ) * 2^e
472  // = buffer * 10^m + 10^(m-1) * (d * 2^-e + r) * 2^e
473  // = buffer * 10^m + 10^(m-1) * d + 10^(m-1) * r * 2^e
474  // = (buffer * 10 + d) * 10^(m-1) + 10^(m-1) * r * 2^e
475 
476  assert(d <= 9);
477  buffer[length++] = kDigits[d]; // buffer := buffer * 10 + d
478 
479  // w+ = buffer * 10^(m-1) + 10^(m-1) * r * 2^e
480 
481  p2 = r;
482  m -= 1;
483 
484  // w+ = buffer * 10^m + 10^m * p2 * 2^e
485  //
486  // Invariant (1) restored.
487 
488  // p2 is now scaled by 10^(-m) since it repeatedly is multiplied by 10.
489  // To keep the units in sync, delta and dist need to be scaled too.
490  delta *= 10;
491  dist *= 10;
492 
493  uint64_t const rest = p2;
494 
495  // Check if enough digits have been generated.
496  if (rest <= delta) {
497  decimal_exponent += m;
498 
499  // ten_m represents 10^m as a Fp with an exponent e.
500  //
501  // Note: m < 0
502  //
503  // Note:
504  // delta and dist are now scaled by 10^(-m) (they are repeatedly
505  // multiplied by 10) and we need to do the same with ten_m.
506  //
507  // 10^(-m) * 10^m = 10^(-m) * ten_m * 2^e
508  // = (10^(-m) * 10^m * 2^-e) * 2^e
509  // = 2^-e * 2^e
510  //
511  // one.f = 2^-e and the exponent e is implicit.
512  //
513  uint64_t const ten_m = 1ULL << e_shift;
514  Grisu2Round(buffer[length-1], dist, delta, rest, ten_m);
515  return std::pair<unsigned, int>(length, decimal_exponent);
516  }
517  }
518 
519  // By construction this algorithm generates the shortest possible decimal
520  // number (Loitsch, Theorem 6.2) which rounds back to w.
521  // For an input number of precision p, at least
522  //
523  // N = 1 + ceil(p * log_10(2))
524  //
525  // decimal digits are sufficient to identify all binary floating-point
526  // numbers (Matula, "In-and-Out conversions").
527  // This implies that the algorithm does not produce more than N decimal
528  // digits.
529  //
530  // N = 17 for p = 53 (IEEE double precision)
531  // N = 9 for p = 24 (IEEE single precision)
532 }
533 
534 } // namespace
535 
536 // v = buf * 10^decimal_exponent
537 // len is the length of the buffer (number of decimal digits)
538 std::pair<unsigned, int> Grisu2(Fp m_minus, Fp v, Fp m_plus, char* buf) {
539  assert(v.e == m_minus.e);
540  assert(v.e == m_plus.e);
541  //
542  // --------(-----------------------+-----------------------)-------- (A)
543  // m- v m+
544  //
545  // --------------------(-----------+-----------------------)-------- (B)
546  // m- v m+
547  //
548  // First scale v (and m- and m+) such that the exponent is in the range
549  // [alpha, beta].
550  //
551 
552  CachedPower const cached = GetCachedPowerForBinaryExponent(m_plus.e);
553 
554  Fp const c_minus_k(cached.f, cached.e); // = c ~= 10^k
555 
556  Fp const w = v.Mul(c_minus_k); // Exponent of the products is v.e + c_minus_k.e + q
557  Fp const w_minus = m_minus.Mul(c_minus_k);
558  Fp const w_plus = m_plus.Mul(c_minus_k);
559 
560  //
561  // ----(---+---)---------------(---+---)---------------(---+---)----
562  // w- w w+
563  // = c*m- = c*v = c*m+
564  //
565  // Fp::Mul rounds its result and c_minus_k is approximated too. w (as well
566  // as w- and w+) are now off by a small amount.
567  // In fact:
568  //
569  // w - v * 10^k < 1 ulp
570  //
571  // To account for this inaccuracy, add resp. subtract 1 ulp.
572  //
573  // --------+---[---------------(---+---)---------------]---+--------
574  // w- M- w M+ w+
575  //
576  // Now any number in [M-, M+] (bounds included) will round to w when input,
577  // regardless of how the input rounding algorithm breaks ties.
578  //
579  // And DigitGen generates the shortest possible such number in [M-, M+].
580  // This does not mean that Grisu2 always generates the shortest possible
581  // number in the interval (m-, m+).
582  //
583  Fp const M_minus = Fp(w_minus.f + 1, w_minus.e);
584  Fp const M_plus = Fp(w_plus.f - 1, w_plus.e);
585 
586  return Grisu2DigitGen(buf, -cached.k, M_minus, w, M_plus);
587 }
588 
589 //------------------------------------------------------------------------------
590 //
591 //------------------------------------------------------------------------------
592 
593 // Returns a pointer to the element following the exponent
594 char* AppendExponent(char* buf, int e) {
595  static constexpr char const* const kDigits = "0123456789";
596  static constexpr char const* const kDigits100 =
597  "00010203040506070809"
598  "10111213141516171819"
599  "20212223242526272829"
600  "30313233343536373839"
601  "40414243444546474849"
602  "50515253545556575859"
603  "60616263646566676869"
604  "70717273747576777879"
605  "80818283848586878889"
606  "90919293949596979899";
607 
608  assert(e > -1000);
609  assert(e < 1000);
610 
611  if (e < 0)
612  *buf++ = '-', e = -e;
613  else
614  *buf++ = '+';
615 
616  uint32_t const k = static_cast<uint32_t>(e);
617  if (k < 10) {
618  // *buf++ = kDigits[0];
619  *buf++ = kDigits[k];
620  } else if (k < 100) {
621  *buf++ = kDigits100[2 * k + 0];
622  *buf++ = kDigits100[2 * k + 1];
623  } else {
624  uint32_t const q = k / 100;
625  uint32_t const r = k % 100;
626  *buf++ = kDigits[q];
627  *buf++ = kDigits100[2 * r + 0];
628  *buf++ = kDigits100[2 * r + 1];
629  }
630 
631  return buf;
632 }
633 
634 char* FormatBuffer(char* buf, int k, int n) {
635  // v = digits * 10^(n-k)
636  // k is the length of the buffer (number of decimal digits)
637  // n is the position of the decimal point relative to the start of the buffer.
638  //
639  // Format the decimal floating-number v in the same way as JavaScript's ToString
640  // applied to number type.
641  //
642  // See:
643  // https://tc39.github.io/ecma262/#sec-tostring-applied-to-the-number-type
644 
645  if (k <= n && n <= 21) {
646  // digits[000]
647 
648  std::memset(buf + k, '0', static_cast<size_t>(n - k));
649  // if (trailing_dot_zero)
650  //{
651  // buf[n++] = '.';
652  // buf[n++] = '0';
653  //}
654  return buf + n; // (len <= 21 + 2 = 23)
655  }
656 
657  if (0 < n && n <= 21) {
658  // dig.its
659  assert(k > n);
660 
661  std::memmove(buf + (n + 1), buf + n, static_cast<size_t>(k - n));
662  buf[n] = '.';
663  return buf + (k + 1); // (len == k + 1 <= 18)
664  }
665 
666  if (-6 < n && n <= 0) {
667  // 0.[000]digits
668 
669  std::memmove(buf + (2 + -n), buf, static_cast<size_t>(k));
670  buf[0] = '0';
671  buf[1] = '.';
672  std::memset(buf + 2, '0', static_cast<size_t>(-n));
673  return buf + (2 + (-n) + k); // (len <= k + 7 <= 24)
674  }
675 
676  if (k == 1) {
677  // dE+123
678 
679  buf += 1; // (len <= 1 + 5 = 6)
680  } else {
681  // d.igitsE+123
682 
683  std::memmove(buf + 2, buf + 1, static_cast<size_t>(k - 1));
684  buf[1] = '.';
685  buf += 1 + k; // (len <= k + 6 = 23)
686  }
687 
688  *buf++ = 'e';
689  return AppendExponent(buf, n - 1);
690 }
691 
692 } // namespace dtoa
693 
694 } // namespace util