float2decimal.h
1 // Copyright 2017 Alexander Bolz
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 // SOFTWARE.
20 
21 // Implements the Grisu2 algorithm for binary to decimal floating-point
22 // conversion.
23 //
24 // This implementation is a slightly modified version of the reference
25 // implementation by Florian Loitsch which can be obtained from
26 // http://florian.loitsch.com/publications (bench.tar.gz)
27 // The original license can be found at the end of this file.
28 //
29 // References:
30 //
31 // [1] Loitsch, "Printing Floating-Point Numbers Quickly and Accurately with Integers",
32 // Proceedings of the ACM SIGPLAN 2010 Conference on Programming Language Design and
33 // Implementation, PLDI 2010
34 // [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and Accurately",
35 // Proceedings of the ACM SIGPLAN 1996 Conference on Programming Language Design and
36 // Implementation, PLDI 1996
37 
38 // Header-only U+1F926 U+1F937
39 
40 #pragma once
41 
42 #include <cassert>
43 #include <cstring>
44 #include <utility>
45 
46 #include "util/math/ieeefloat.h"
47 #include "base/bits.h"
48 
49 namespace util {
50 
51 namespace dtoa {
52 
53 template <typename Float> char* ToString(Float value, char* dest);
54 
55 // TODO: to implement it.
56 // Any non-special float number will have at most 17 significant decimal digits.
57 // See https://en.wikipedia.org/wiki/Double-precision_floating-point_format#IEEE_754_double-precision_binary_floating-point_format:_binary64
58 // So for non-nans we should always be able to convert float to int64 integer/exponent pair
59 // without loosing precision.
60 // The exception is negative zero floating number - this will be translated to just 0.
61 // The function returns integer val, it's decimal length not including sign and the exponent
62 // such that the resulting float := val * 10^exponent.
63 // For example, for -0.05 it returns: (-5, -2, 1)
64 template <typename Float> bool ToDecimal(Float value, int64_t* val, int16_t* exponent,
65  uint16_t* decimal_len);
66 
67 //------------------------------------------------------------------------------
68 //
69 //------------------------------------------------------------------------------
70 
71 struct Fp { // f * 2^e
72  static constexpr int const kPrecision = 64; // = q
73 
74  uint64_t f;
75  int e;
76 
77  constexpr Fp() : f(0), e(0) {}
78  constexpr Fp(uint64_t f_, int e_) : f(f_), e(e_) {}
79 
80  // Returns *this - y.
81  // Requires: *this.e == y.e and x.f >= y.f
82  Fp Sub(Fp y) const {
83  assert(e == y.e);
84  assert(f >= y.f);
85 
86  return Fp(f - y.f, e);
87  }
88 
89  // Returns *this * y.
90  // The result is rounded. (Only the upper q bits are returned.)
91  Fp Mul(Fp y) const;
92 
93  // Normalize x such that the significand is >= 2^(q-1).
94  // Requires: x.f != 0
95  void Normalize() {
96  unsigned const leading_zeros = Bits::CountLeadingZeros64(f);
97  f <<= leading_zeros;
98  e -= leading_zeros;
99  }
100 
101  // Normalize x such that the result has the exponent E.
102  // Requires: e >= this->e and the upper (e - this->e) bits of this->f must be zero.
103  void NormalizeTo(int e);
104 };
105 
106 // If we compute float := f * 2 ^ (e - bias), where f, e are integers (no implicit 1. point)
107  // In that case we need to divide f by 2 ^ kSignificandLen, which in fact increases exp bias:
108  // res : = (f / 2^kSignificandLen) * 2^(1 - kExponentBias) =
109  // f * 2 (1 - kExponentBias - kSignificandLen)
110  // So we redefine kExpBias for this equation.
111 template<typename Float> struct FpTraits { // explicit division of mantissa by 2^mantissalen.
112  using IEEEType = IEEEFloat<Float>;
113  static constexpr int const kExpBias = IEEEType::kExponentBias + IEEEType::kSignificandLen;
114 
115  static constexpr int const kDenormalExp = 1 - kExpBias;
116  static constexpr int const kMaxExp = IEEEType::kMaxExponent - kExpBias;
117 
118 
119  static Float FromFP(uint64_t f, int e) {
120  while (f > IEEEType::kHiddenBit + IEEEType::kSignificandMask) {
121  f >>= 1;
122  e++;
123  }
124 
125  if (e >= kMaxExp) {
126  return std::numeric_limits<Float>::infinity();
127  }
128 
129  if (e < kDenormalExp) {
130  return 0.0;
131  }
132 
133  while (e > kDenormalExp && (f & IEEEType::kHiddenBit) == 0) {
134  f <<= 1;
135  e--;
136  }
137 
138  uint64_t biased_exponent;
139  if (e == kDenormalExp && (f & IEEEType::kHiddenBit) == 0)
140  biased_exponent = 0;
141  else
142  biased_exponent = static_cast<uint64_t>(e + kExpBias);
143 
144  return IEEEType(biased_exponent, f).value;
145  }
146 };
147 
148 // Returns (filled buffer len, decimal_exponent) pair.
149 std::pair<unsigned, int> Grisu2(Fp m_minus, Fp v, Fp m_plus, char* buf);
150 
151 //------------------------------------------------------------------------------
152 //
153 //------------------------------------------------------------------------------
154 
155 struct BoundedFp {
156  Fp w;
157  Fp minus;
158  Fp plus;
159 };
160 
161 //
162 // Computes the boundaries m- and m+ of the floating-point value v.
163 //
164 // Determine v- and v+, the floating-point predecessor and successor if v,
165 // respectively.
166 //
167 // v- = v - 2^e if f != 2^(p-1) or e != e_min (A)
168 // = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B)
169 //
170 // v+ = v + 2^e
171 //
172 // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
173 // between m- and m+ round to v, regardless of how the input rounding algorithm
174 // breaks ties.
175 //
176 // ---+-------------+-------------+-------------+-------------+--- (A)
177 // v- m- v m+ v+
178 //
179 // -----------------+------+------+-------------+-------------+--- (B)
180 // v- m- v m+ v+
181 //
182 // Note that m- and m+ are (by definition) not representable with precision p
183 // and we therefore need some extra bits of precision.
184 //
185 template <typename Float>
186 inline BoundedFp ComputeBoundedFp(Float v_ieee) {
187  using IEEEType = IEEEFloat<Float>;
188 
189  //
190  // Convert the IEEE representation into a DiyFp.
191  //
192  // If v is denormal:
193  // value = 0.F * 2^(1 - E_bias) = (F) * 2^(1 - E_bias - (p-1))
194  // If v is normalized:
195  // value = 1.F * 2^(E - E_bias) = (2^(p-1) + F) * 2^(E - E_bias - (p-1))
196  //
197  IEEEType const v_ieee_bits(v_ieee);
198 
199  // biased exponent.
200  uint64_t const E = v_ieee_bits.ExponentBits();
201  uint64_t const F = v_ieee_bits.SignificandBits();
202 
203  Fp v = (E == 0) // denormal?
205  : Fp(IEEEType::kHiddenBit + F, static_cast<int>(E) - FpTraits<Float>::kExpBias);
206 
207  //
208  // v+ = v + 2^e = (f + 1) * 2^e and therefore
209  //
210  // m+ = (v + v+) / 2
211  // = (2*f + 1) * 2^(e-1)
212  //
213  Fp m_plus = Fp(2 * v.f + 1, v.e - 1);
214 
215  //
216  // If f != 2^(p-1), then v- = v - 2^e = (f - 1) * 2^e and
217  //
218  // m- = (v- + v) / 2
219  // = (2*f - 1) * 2^(e-1)
220  //
221  // If f = 2^(p-1), then the next smaller _normalized_ floating-point number
222  // is actually v- = v - 2^(e-1) = (2^p - 1) * 2^(e-1) and therefore
223  //
224  // m- = (4*f - 1) * 2^(e-2)
225  //
226  // The exception is the smallest normalized floating-point number
227  // v = 2^(p-1) * 2^e_min. In this case the predecessor is the largest
228  // denormalized floating-point number: v- = (2^(p-1) - 1) * 2^e_min and then
229  //
230  // m- = (2*f - 1) * 2^(e-1)
231  //
232  // If v is denormal, v = f * 2^e_min and v- = v - 2^e = (f - 1) * 2^e and
233  // again
234  //
235  // m- = (2*f - 1) * 2^(e-1)
236  //
237  // Note: 0 is not a valid input for Grisu and in case v is denormal:
238  // f != 2^(p-1).
239  //
240  // For IEEE floating-point numbers not equal to 0, the condition f = 2^(p-1)
241  // is equivalent to F = 0, and for the smallest normalized number E = 1.
242  // For denormals E = 0 (and F != 0).
243  //SignificandBits
244  Fp m_minus = (F == 0 && E > 1) ? Fp(4 * v.f - 1, v.e - 2) : Fp(m_plus.f - 2, m_plus.e);
245 
246  //
247  // Determine the normalized w+ = m+.
248  //
249  m_plus.Normalize();
250  v.Normalize();
251 
252  //
253  // Determine w- = m- such that e_(w-) = e_(w+).
254  //
255  m_minus.NormalizeTo(m_plus.e);
256 
257  return BoundedFp{v, m_minus, m_plus};
258 }
259 
260 char* FormatBuffer(char* buf, int k, int n);
261 
262 //
263 // Generates a decimal representation of the input floating-point number V in
264 // BUF.
265 //
266 // The result is formatted like JavaScript's ToString applied to a number type.
267 // Except that:
268 // An argument representing an infinity is converted to "inf" or "-inf".
269 // An argument representing a NaN is converted to "nan".
270 //
271 // This function never writes more than 25 characters to BUF and returns an
272 // iterator pointing past-the-end of the decimal representation.
273 // The result is guaranteed to round-trip (when read back by a correctly
274 // rounding implementation.)
275 //
276 // Note:
277 // The result is not null-terminated.
278 //
279 
280 // next should point to at least 25 bytes.
281 template <typename Float> char* ToString(Float value, char* dest) {
282  using IEEEType = IEEEFloat<Float>;
283  static_assert(Fp::kPrecision >= IEEEType::kPrecision + 3, "insufficient precision");
284 
285  constexpr char kNaNString[] = "NaN"; // assert len <= 25
286  constexpr char kInfString[] = "Infinity"; // assert len <= 24
287  static_assert(sizeof(kNaNString) == 4, "");
288 
289  IEEEType const v(value);
290  // assert(!v.IsNaN());
291  // assert(!v.IsInf());
292 
293  if (v.IsNaN()) {
294  std::memcpy(dest, kNaNString, sizeof(kNaNString) - 1);
295  return dest + sizeof(kNaNString) - 1;
296  }
297  if (v.IsNegative()) {
298  *dest++ = '-';
299  }
300 
301  if (v.IsInf()) {
302  std::memcpy(dest, kInfString, sizeof(kInfString) - 1);
303  return dest + sizeof(kInfString) - 1;
304  }
305 
306  if (v.IsZero()) {
307  *dest++ = '0';
308  } else {
309  BoundedFp w = ComputeBoundedFp(v.Abs());
310 
311  // Compute the decimal digits of v = digits * 10^decimal_exponent.
312  // len is the length of the buffer, i.e. the number of decimal digits
313  std::pair<unsigned, int> res = Grisu2(w.minus, w.w, w.plus, dest);
314 
315  // Compute the position of the decimal point relative to the start of the buffer.
316  int n = res.first + res.second;
317 
318  dest = FormatBuffer(dest, res.first, n);
319  // (len <= 1 + 24 = 25)
320  }
321 
322  return dest;
323 }
324 
325 template <typename Float> bool ToDecimal(Float value, int64_t* val, int16_t* exponent,
326  uint16_t* decimal_len) {
327  static_assert(std::is_floating_point<Float>::value, "");
328 
329  using IEEEType = IEEEFloat<Float>;
330  const IEEEType v(value);
331  if (v.IsNaN() || v.IsInf())
332  return false;
333  int64_t decimal = 0;
334 
335  if (v.IsZero()) {
336  *exponent = 0;
337  *decimal_len = 1;
338  } else {
339  BoundedFp w = ComputeBoundedFp(v.Abs());
340 
341  // Compute the decimal digits of v = digits * 10^decimal_exponent.
342  // len is the length of the buffer, i.e. the number of decimal digits
343  char dest[20];
344 
345  std::pair<unsigned, int> res = Grisu2(w.minus, w.w, w.plus, dest);
346  assert(res.first < 18);
347  for (unsigned i = 0; i < res.first; ++i) {
348  decimal = decimal * 10 + (dest[i] - '0');
349  }
350  *decimal_len = res.first;
351  if (v.IsNegative())
352  decimal = -decimal;
353  *exponent = res.second;
354  }
355  *val = decimal;
356 
357  return true;
358 }
359 
360 inline Fp Fp::Mul(Fp y) const {
361 // Computes:
362 // f = round((x.f * y.f) / 2^q)
363 // e = x.e + y.e + q
364  __extension__ using Uint128 = unsigned __int128;
365 
366  Uint128 const p = Uint128{f} * Uint128{y.f};
367 
368  uint64_t h = static_cast<uint64_t>(p >> 64);
369  uint64_t l = static_cast<uint64_t>(p);
370  h += l >> 63; // round, ties up: [h, l] += 2^q / 2
371 
372  return Fp(h, e + y.e + 64);
373 }
374 
375 inline void Fp::NormalizeTo(int new_e) {
376  int const delta = e - new_e;
377 
378  assert(delta >= 0);
379  assert(((this->f << delta) >> delta) == this->f);
380  f <<= delta;
381  e = new_e;
382 }
383 
384 } // namespace dtoa
385 } // namespace util
386 
387 // http://florian.loitsch.com/publications (bench.tar.gz)
388 //
389 // Copyright (c) 2009 Florian Loitsch
390 //
391 // Permission is hereby granted, free of charge, to any person
392 // obtaining a copy of this software and associated documentation
393 // files (the "Software"), to deal in the Software without
394 // restriction, including without limitation the rights to use,
395 // copy, modify, merge, publish, distribute, sublicense, and/or sell
396 // copies of the Software, and to permit persons to whom the
397 // Software is furnished to do so, subject to the following
398 // conditions:
399 //
400 // The above copyright notice and this permission notice shall be
401 // included in all copies or substantial portions of the Software.
402 //
403 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
404 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
405 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
406 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
407 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
408 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
409 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
410 // OTHER DEALINGS IN THE SOFTWARE.