block_compressor.h
1 // Copyright 2017, Beeri 15. All rights reserved.
2 // Author: Roman Gershman (romange@gmail.com)
3 //
4 #pragma once
5 
6 #include <memory>
7 #include <vector>
8 #include "strings/range.h"
9 
10 /* Zstd based wrapper that has the following properties:
11  1. It compresses input data into a zstd frame.
12  2. The frame will consist of blocks that each one decompress into output buffers of exactly 128K
13  unless it's a last block.
14  3. During the decompression a previous block in addition to the current must be kept in RAM
15  to allow back referencing in the decompressor.
16 
17  The flow sequence is: Add*, Finalize. Can be applied multiple times for the same
18  block compressor object.
19 */
20 namespace util {
21 
23  public:
24  enum { BLOCK_SIZE_LOG = 17, BLOCK_SIZE = 1 << BLOCK_SIZE_LOG };
25 
27 
28  ~BlockCompressor();
29 
30  void Add(uint8_t b) {
31  if (compress_block_size_ == 0) {
32  Start();
33  }
34  buf_start()[pos_++] = b;
35  if (pos_ == BLOCK_SIZE) {
36  Compress();
37  }
38  }
39 
40  void Add(strings::ByteRange br);
41 
42  // Flushes and compresses all the pending data and finalizes the frame.
43  // If no data was added, compressed_blocks() will return empty vector.
44  void Finalize();
45 
46  // Flushes the pending data without finalizing the frame. Allows compression of blocks
47  // smaller than BLOCK_SIZE.
48  // Finalize should still be called to write a valid zstd frame.
49  void Compress() { CompressInternal(false); }
50 
51  const std::vector<strings::ByteRange>& compressed_blocks() const {
52  return compressed_blocks_;
53  }
54 
55  size_t compressed_size() const { return compressed_size_; }
56  size_t pending_size() const { return pos_; }
57 
58  // Zero-copy API - saves redundant memory copy.
59  // The flow is to get destination buffer to write directly by calling to BlockBuffer.
60  // Then the user should write into this buffer starting from its begin position.
61  // Once the data is written a user should call "Commit(size)" with how much data was actually
62  // written there.
63  strings::MutableByteRange BlockBuffer();
64 
65  // Commits the write into the compressor. Returns true if the current block was fully filled and
66  // compressed, otherwise false is returned.
67  bool Commit(size_t sz);
68 
69  // Needed for correctly returning "compressed_size()" after finalizing the frame.
70  void ClearCompressedData();
71 
72  private:
73  void Start();
74  void CompressInternal(bool finalize_frame);
75 
76  const uint8_t* buf_start() const {
77  return double_buf_.get() + (BLOCK_SIZE + 1) * cur_buf_index_;
78  }
79 
80  uint8_t* buf_start() {
81  return double_buf_.get() + (BLOCK_SIZE + 1) * cur_buf_index_;
82  }
83 
84  void* zstd_cntx_ = nullptr;
85  size_t compress_block_size_ = 0;
86  size_t pos_ = 0;
87 
88  std::vector<std::unique_ptr<uint8_t[]>> compressed_bufs_;
89  std::vector<strings::ByteRange> compressed_blocks_;
90  size_t compressed_size_ = 0;
91  std::unique_ptr<uint8_t[]> double_buf_;
92  unsigned cur_buf_index_ = 0; // 0 or 1
93 };
94 
96  public:
97  enum { BLOCK_SIZE = BlockCompressor::BLOCK_SIZE};
98 
100 
102 
103  // Returns 0 if decompression of the frame is ended, 1 if it's still going.
104  // In any case "*consumed" will hold how many bytes were consumed from br.
105  // If negative number is returned - then last portion of br is too small to decompress
106  // In that case, the -(return value) will tell how many input bytes are needed.
107  int Decompress(strings::ByteRange br, uint32_t* consumed);
108 
109  // Can be called after successfuly Decompress call.
110  strings::ByteRange GetDecompressedBlock() const;
111 
112  private:
113  void* zstd_dcntx_ = nullptr;
114  unsigned frame_state_ = 2; // bit 1 for init state; bit 0 - which block to write to.
115  std::unique_ptr<uint8_t[]> buf_;
116  size_t decompress_size_ = 0;
117 };
118 
119 
120 
121 } // namespace util