double_compressor.h
1 // Copyright 2017, Beeri 15. All rights reserved.
2 // Author: Roman Gershman (romange@gmail.com)
3 //
4 #pragma once
5 
6 #include <cstdint>
7 #include <map>
8 #include <memory>
9 
10 namespace util {
11 
13  public:
14  enum { BLOCK_MAX_BYTES = 1U << 16, BLOCK_MAX_LEN = BLOCK_MAX_BYTES / sizeof(double) }; // 2^13
15  enum { COMPRESS_BLOCK_BOUND = (1U << 16) + 3,
16  DECIMAL_HEADER_MAX_SIZE = 14};
17 
18  static constexpr uint32_t CommitMaxSize(uint32_t sz) {
19  return (sz*8 + (sz*8 / 255) + 16) + 3 + DECIMAL_HEADER_MAX_SIZE;
20  }
21 
22  enum {COMMIT_MAX_SIZE = BLOCK_MAX_LEN * 8 + BLOCK_MAX_LEN * 8 / 255 + 16 /* lz4 space */ + 3 +
23  DECIMAL_HEADER_MAX_SIZE /*header*/};
24 
25  // Dest must be at least of CommitMaxSize(sz). The simpler approach is to always use
26  // COMMIT_MAX_SIZE to accomodate BLOCK_MAX_LEN.
27  // Commit will finally write no more than COMPRESS_BLOCK_BOUND bytes even though it will use
28  // more space in between.
29  uint32_t Commit(const double* src, uint32_t sz, uint8_t* dest);
30 
31  private:
32  struct ExpInfo;
33  typedef std::map<int16_t, ExpInfo> ExponentMap;
34 
35  unsigned NormalizeDecimals(unsigned count, const double* dbl_src);
36  uint32_t Optimize(const ExponentMap& em);
37  uint32_t WriteRawDoubles(const double* src, uint32_t sz, uint8_t* dest);
38 
39  struct __attribute__((aligned(4))) Decimal {
40  int64_t val;
41  int16_t exp;
42  uint16_t dec_len;
43 
44  bool CanNormalize(int exp_reference) const {
45  return val == 0 || (dec_len < 17 && exp >= exp_reference &&
46  exp - exp_reference <= 17 - dec_len);
47  }
48  };
49 
50  struct DecimalHeader {
51  int64_t min_val;
52  int16_t exponent;
53  uint16_t lz4_size, first_exception_index;
54 
55  void Serialize(uint8_t flags, uint8_t* dest);
56  uint32_t Parse(uint8_t flags, const uint8_t* src);
57  };
58 
59  struct Aux {
60  Decimal dec[BLOCK_MAX_LEN];
61  double exceptions[BLOCK_MAX_LEN];
62  int64_t normalized[BLOCK_MAX_LEN];
63 
64  DecimalHeader header;
65  };
66 
67  std::unique_ptr<Aux> aux_;
68  friend class DoubleDecompressor;
69 };
70 
72  public:
73  enum {BLOCK_MAX_LEN = DoubleCompressor::BLOCK_MAX_LEN};
74 
76 
77  // dest must accomodate at least BLOCK_MAX_LEN.
78  // Returns -1 if it can not decompress src because src_len is not exact block size.
79  // Fully consumes successfully decompressed block.
80  // On success returns how many doubles were written to dest.
81  int32_t Decompress(const uint8_t* src, uint32_t src_len, double* dest);
82 
83  // a valid header must point at least 3 bytes.
84  // Returns block size including the header size.
85  static uint32_t BlockSize(const uint8_t* header) {
86  return 3 + ((uint32_t(header[2]) << 8) | header[1]);
87  }
88 
89  private:
90  struct Aux {
91  uint8_t z4buf[DoubleCompressor::BLOCK_MAX_BYTES];
92  double exceptions[BLOCK_MAX_LEN];
93  };
94 
95  std::unique_ptr<Aux> aux_;
96 };
97 
98 } // namespace util