46 #include "util/math/ieeefloat.h" 47 #include "base/bits.h" 53 template <
typename Float>
char* ToString(Float value,
char* dest);
64 template <
typename Float>
bool ToDecimal(Float value, int64_t* val, int16_t* exponent,
65 uint16_t* decimal_len);
72 static constexpr
int const kPrecision = 64;
77 constexpr
Fp() : f(0), e(0) {}
78 constexpr
Fp(uint64_t f_,
int e_) : f(f_), e(e_) {}
86 return Fp(f - y.f, e);
96 unsigned const leading_zeros = Bits::CountLeadingZeros64(f);
103 void NormalizeTo(
int e);
113 static constexpr
int const kExpBias = IEEEType::kExponentBias + IEEEType::kSignificandLen;
115 static constexpr
int const kDenormalExp = 1 - kExpBias;
116 static constexpr
int const kMaxExp = IEEEType::kMaxExponent - kExpBias;
119 static Float FromFP(uint64_t f,
int e) {
120 while (f > IEEEType::kHiddenBit + IEEEType::kSignificandMask) {
126 return std::numeric_limits<Float>::infinity();
129 if (e < kDenormalExp) {
133 while (e > kDenormalExp && (f & IEEEType::kHiddenBit) == 0) {
138 uint64_t biased_exponent;
139 if (e == kDenormalExp && (f & IEEEType::kHiddenBit) == 0)
142 biased_exponent = static_cast<uint64_t>(e + kExpBias);
144 return IEEEType(biased_exponent, f).value;
149 std::pair<unsigned, int> Grisu2(
Fp m_minus,
Fp v,
Fp m_plus,
char* buf);
185 template <
typename Float>
186 inline BoundedFp ComputeBoundedFp(Float v_ieee) {
197 IEEEType
const v_ieee_bits(v_ieee);
200 uint64_t
const E = v_ieee_bits.ExponentBits();
201 uint64_t
const F = v_ieee_bits.SignificandBits();
213 Fp m_plus =
Fp(2 * v.f + 1, v.e - 1);
244 Fp m_minus = (F == 0 && E > 1) ?
Fp(4 * v.f - 1, v.e - 2) :
Fp(m_plus.f - 2, m_plus.e);
255 m_minus.NormalizeTo(m_plus.e);
260 char* FormatBuffer(
char* buf,
int k,
int n);
281 template <
typename Float>
char* ToString(Float value,
char* dest) {
282 using IEEEType = IEEEFloat<Float>;
283 static_assert(Fp::kPrecision >= IEEEType::kPrecision + 3,
"insufficient precision");
285 constexpr
char kNaNString[] =
"NaN";
286 constexpr
char kInfString[] =
"Infinity";
287 static_assert(
sizeof(kNaNString) == 4,
"");
289 IEEEType
const v(value);
294 std::memcpy(dest, kNaNString,
sizeof(kNaNString) - 1);
295 return dest +
sizeof(kNaNString) - 1;
297 if (v.IsNegative()) {
302 std::memcpy(dest, kInfString,
sizeof(kInfString) - 1);
303 return dest +
sizeof(kInfString) - 1;
309 BoundedFp w = ComputeBoundedFp(v.Abs());
313 std::pair<unsigned, int> res = Grisu2(w.minus, w.w, w.plus, dest);
316 int n = res.first + res.second;
318 dest = FormatBuffer(dest, res.first, n);
325 template <
typename Float>
bool ToDecimal(Float value, int64_t* val, int16_t* exponent,
326 uint16_t* decimal_len) {
327 static_assert(std::is_floating_point<Float>::value,
"");
329 using IEEEType = IEEEFloat<Float>;
330 const IEEEType v(value);
331 if (v.IsNaN() || v.IsInf())
339 BoundedFp w = ComputeBoundedFp(v.Abs());
345 std::pair<unsigned, int> res = Grisu2(w.minus, w.w, w.plus, dest);
346 assert(res.first < 18);
347 for (
unsigned i = 0; i < res.first; ++i) {
348 decimal = decimal * 10 + (dest[i] -
'0');
350 *decimal_len = res.first;
353 *exponent = res.second;
360 inline Fp Fp::Mul(Fp y)
const {
364 __extension__
using Uint128 =
unsigned __int128;
366 Uint128
const p = Uint128{f} * Uint128{y.f};
368 uint64_t h = static_cast<uint64_t>(p >> 64);
369 uint64_t l = static_cast<uint64_t>(p);
372 return Fp(h, e + y.e + 64);
375 inline void Fp::NormalizeTo(
int new_e) {
376 int const delta = e - new_e;
379 assert(((this->f << delta) >> delta) == this->f);