26namespace compact_type {
27 inline constexpr uint8_t
STOP = 0;
30 inline constexpr uint8_t
I8 = 3;
31 inline constexpr uint8_t
I16 = 4;
32 inline constexpr uint8_t
I32 = 5;
33 inline constexpr uint8_t
I64 = 6;
36 inline constexpr uint8_t
LIST = 9;
37 inline constexpr uint8_t
SET = 10;
38 inline constexpr uint8_t
MAP = 11;
39 inline constexpr uint8_t
STRUCT = 12;
86 int16_t delta = field_id - last_field_ids_.top();
87 if (delta > 0 && delta <= 15) {
88 buf_.push_back(
static_cast<uint8_t
>((delta << 4) | thrift_type));
90 buf_.push_back(thrift_type);
91 write_zigzag_i16(field_id);
93 last_field_ids_.top() = field_id;
109 buf_.push_back(val ? 0x01 : 0x00);
114 buf_.push_back(
static_cast<uint8_t
>(val));
123 int16_t delta = field_id - last_field_ids_.top();
124 if (delta > 0 && delta <= 15) {
125 buf_.push_back(
static_cast<uint8_t
>((delta << 4) | thrift_type));
127 buf_.push_back(thrift_type);
128 write_zigzag_i16(field_id);
130 last_field_ids_.top() = field_id;
135 write_varint32(zigzag_encode_i32(val));
140 write_varint64(zigzag_encode_i64(val));
146 std::memcpy(&bits, &val, 8);
147 for (
int i = 0; i < 8; ++i)
148 buf_.push_back(
static_cast<uint8_t
>((bits >> (i * 8)) & 0xFF));
156 std::memcpy(&bits, &val, 4);
157 for (
int i = 0; i < 4; ++i)
158 buf_.push_back(
static_cast<uint8_t
>((bits >> (i * 8)) & 0xFF));
165 throw std::overflow_error(
"CompactEncoder::write_string: length "
166 + std::to_string(val.size()) +
" exceeds MAX_STRING_BYTES");
168 write_varint32(
static_cast<uint32_t
>(val.size()));
169 buf_.insert(buf_.end(), val.begin(), val.end());
176 throw std::overflow_error(
"CompactEncoder::write_binary: length "
177 + std::to_string(len) +
" exceeds MAX_STRING_BYTES");
179 write_varint32(
static_cast<uint32_t
>(len));
180 buf_.insert(buf_.end(),
data,
data + len);
187 throw std::invalid_argument(
"write_list_header: negative list size");
190 buf_.push_back(
static_cast<uint8_t
>((
size << 4) | elem_type));
192 buf_.push_back(
static_cast<uint8_t
>(0xF0 | elem_type));
193 write_varint32(
static_cast<uint32_t
>(
size));
200 [[nodiscard]]
const std::vector<uint8_t>&
data()
const {
return buf_; }
203 [[nodiscard]]
size_t size()
const {
return buf_.size(); }
209 while (!last_field_ids_.empty()) last_field_ids_.pop();
210 last_field_ids_.push(0);
214 std::vector<uint8_t> buf_;
215 std::stack<int16_t> last_field_ids_;
219 void write_varint32(uint32_t val) {
221 buf_.push_back(
static_cast<uint8_t
>((val & 0x7F) | 0x80));
224 buf_.push_back(
static_cast<uint8_t
>(val));
227 void write_varint64(uint64_t val) {
229 buf_.push_back(
static_cast<uint8_t
>((val & 0x7F) | 0x80));
232 buf_.push_back(
static_cast<uint8_t
>(val));
235 void write_zigzag_i16(int16_t val) {
236 uint32_t zz = zigzag_encode_i32(
static_cast<int32_t
>(val));
243 static uint32_t zigzag_encode_i32(int32_t val) {
244 return (
static_cast<uint32_t
>(val) << 1) ^
static_cast<uint32_t
>(val >> 31);
248 static uint64_t zigzag_encode_i64(int64_t val) {
249 return (
static_cast<uint64_t
>(val) << 1) ^
static_cast<uint64_t
>(val >> 63);
275 : data_(data), size_(size), pos_(0), error_(false),
276 pending_bool_{}, pending_bool_valid_(false) {
277 last_field_ids_.push(0);
286 if (!ensure(1))
return {0, 0};
288 uint8_t
byte = data_[pos_++];
291 if (
byte == 0x00)
return {0, 0};
293 uint8_t type =
byte & 0x0F;
294 int16_t delta =
static_cast<int16_t
>((
byte >> 4) & 0x0F);
299 field_id = last_field_ids_.top() + delta;
302 int32_t id32 = read_zigzag_i32();
303 field_id =
static_cast<int16_t
>(id32);
305 last_field_ids_.top() = field_id;
310 pending_bool_ =
true;
311 pending_bool_valid_ =
true;
313 pending_bool_ =
false;
314 pending_bool_valid_ =
true;
317 if (++field_count_ > MAX_FIELD_COUNT) {
322 if (++total_fields_read_ > MAX_TOTAL_FIELDS) {
327 return {field_id, type};
333 if (pending_bool_valid_) {
334 pending_bool_valid_ =
false;
335 return pending_bool_;
337 if (!ensure(1))
return false;
338 return data_[pos_++] != 0;
343 if (!ensure(1))
return 0;
344 return static_cast<int8_t
>(data_[pos_++]);
349 return read_zigzag_i32();
354 return read_zigzag_i64();
359 if (!ensure(8))
return 0.0;
361 for (
int i = 0; i < 8; ++i)
362 bits |=
static_cast<uint64_t
>(data_[pos_++]) << (i * 8);
364 std::memcpy(&val, &bits, 8);
370 if (!ensure(4))
return 0.0f;
372 for (
int i = 0; i < 4; ++i)
373 bits |=
static_cast<uint32_t
>(data_[pos_++]) << (i * 8);
375 std::memcpy(&val, &bits, 4);
381 uint32_t len = read_varint32();
382 if (len > MAX_STRING_BYTES) { error_ =
true;
return {}; }
383 if (!ensure(len))
return {};
384 std::string result(
reinterpret_cast<const char*
>(data_ + pos_), len);
391 uint32_t len = read_varint32();
392 if (len > MAX_STRING_BYTES) { error_ =
true;
return {}; }
393 if (!ensure(len))
return {};
394 std::vector<uint8_t> result(data_ + pos_, data_ + pos_ + len);
401 if (!ensure(1))
return {0, 0};
402 uint8_t
byte = data_[pos_++];
403 uint8_t elem_type =
byte & 0x0F;
404 int32_t size = (
byte >> 4) & 0x0F;
407 uint32_t raw = read_varint32();
408 if (raw >
static_cast<uint32_t
>((std::numeric_limits<int32_t>::max)())) {
412 size =
static_cast<int32_t
>(raw);
419 if (
static_cast<uint32_t
>(size) > MAX_COLLECTION_SIZE) {
420 error_ =
true;
return {0, 0};
422 return {elem_type, size};
428 switch (thrift_type) {
436 if (ensure(1)) pos_ += 1;
442 (void)read_varint32();
447 (void)read_varint64();
451 if (ensure(8)) pos_ += 8;
456 uint32_t len = read_varint32();
457 if (ensure(len)) pos_ += len;
464 if (hdr.size < 0 ||
static_cast<uint32_t
>(hdr.size) > MAX_COLLECTION_SIZE) {
465 error_ =
true;
break;
467 for (int32_t i = 0; i < hdr.size &&
good(); ++i) {
474 uint32_t map_size = read_varint32();
475 if (map_size == 0)
break;
476 if (map_size > MAX_COLLECTION_SIZE) { error_ =
true;
break; }
477 if (!ensure(1))
break;
478 uint8_t kv_types = data_[pos_++];
479 uint8_t key_type = (kv_types >> 4) & 0x0F;
480 uint8_t val_type = kv_types & 0x0F;
481 for (uint32_t i = 0; i < map_size &&
good(); ++i) {
493 if (fh.is_stop())
break;
509 if (last_field_ids_.size() >= MAX_NESTING_DEPTH) { error_ =
true;
return; }
510 last_field_ids_.push(0);
516 if (last_field_ids_.empty()) { error_ =
true;
return; }
517 last_field_ids_.pop();
524 return (pos_ <= size_) ? (size_ - pos_) : 0;
528 [[nodiscard]]
size_t position()
const {
return pos_; }
531 [[nodiscard]]
bool good()
const {
return !error_; }
534 const uint8_t* data_;
541 bool pending_bool_valid_;
543 static constexpr size_t MAX_NESTING_DEPTH = 64;
544 static constexpr size_t MAX_FIELD_COUNT = 65536;
545 static constexpr size_t MAX_TOTAL_FIELDS = 1'000'000;
546 static constexpr uint32_t MAX_STRING_BYTES = 64u * 1024u * 1024u;
547 static constexpr uint32_t MAX_COLLECTION_SIZE = 1'000'000u;
548 std::stack<int16_t> last_field_ids_;
549 size_t field_count_ = 0;
550 size_t total_fields_read_ = 0;
555 [[nodiscard]]
bool ensure(
size_t n) {
556 if (error_ || n > size_ || pos_ > size_ - n) {
565 [[nodiscard]] uint32_t read_varint32() {
569 if (!ensure(1))
return 0;
570 uint8_t
byte = data_[pos_++];
571 result |=
static_cast<uint32_t
>(
byte & 0x7F) << shift;
572 if ((
byte & 0x80) == 0)
return result;
580 [[nodiscard]] uint64_t read_varint64() {
584 if (!ensure(1))
return 0;
585 uint8_t
byte = data_[pos_++];
586 result |=
static_cast<uint64_t
>(
byte & 0x7F) << shift;
587 if ((
byte & 0x80) == 0)
return result;
597 [[nodiscard]] int32_t read_zigzag_i32() {
598 uint32_t raw = read_varint32();
599 return static_cast<int32_t
>((raw >> 1) ^ -(
static_cast<int32_t
>(raw & 1)));
602 [[nodiscard]] int64_t read_zigzag_i64() {
603 uint64_t raw = read_varint64();
604 return static_cast<int64_t
>((raw >> 1) ^ -(
static_cast<int64_t
>(raw & 1)));
Thrift Compact Protocol reader.
void begin_struct()
Push a new field-ID context for reading a nested struct.
void end_struct()
Pop the field-ID context after finishing a nested struct.
double read_double()
Read a double (8 bytes little-endian, IEEE 754).
FieldHeader read_field_header()
Read a field header.
int64_t read_i64()
Read a 64-bit integer (zigzag + varint64 decode).
CompactDecoder(const uint8_t *data, size_t size)
Construct a decoder over a byte buffer.
int8_t read_i8()
Read an 8-bit signed integer (single raw byte, I8 wire type).
ListHeader read_list_header()
Read a list header. Returns element type and count.
void skip_field(uint8_t thrift_type)
Skip a field without parsing its value.
std::string read_string()
Read a string (varint-length-prefixed UTF-8 bytes).
float read_float()
Read a float (4 bytes little-endian, IEEE 754).
std::vector< uint8_t > read_binary()
Read raw binary data (varint-length-prefixed bytes).
size_t position() const
Returns the current read position (offset from start of buffer).
bool good() const
Returns true if no errors have occurred (no bounds violations).
bool read_bool()
Read a boolean value.
size_t remaining() const
Returns the number of bytes remaining in the buffer.
int32_t read_i32()
Read a 32-bit integer (zigzag + varint decode).
Thrift Compact Protocol writer.
void begin_struct()
Push a new field-ID context for a nested struct.
size_t size() const
Returns the current size of the encoded buffer in bytes.
void write_bool(bool val)
Write a standalone bool (not embedded in a field header).
void end_struct()
Pop the field-ID context after finishing a nested struct.
void write_string(const std::string &val)
Write a string as varint-length-prefixed UTF-8 bytes.
void write_field_bool(int16_t field_id, bool val)
Write a bool field where the value is embedded in the field header's type nibble (1 = true,...
void write_float(float val)
Write a float as 4 bytes little-endian (IEEE 754).
const std::vector< uint8_t > & data() const
Returns a const reference to the underlying byte buffer.
void clear()
Resets the encoder to its initial state (empty buffer, field ID stack reset to a single zero entry).
CompactEncoder()
Default constructor. Initializes field-ID stack with a single zero entry.
void write_field(int16_t field_id, uint8_t thrift_type)
Write a field header.
void write_double(double val)
Write a double as 8 bytes little-endian (IEEE 754).
void write_i32(int32_t val)
Write a 32-bit integer as zigzag + varint.
void write_stop()
Write struct stop marker (0x00).
void write_i64(int64_t val)
Write a 64-bit integer as zigzag + varint.
void write_i8(int8_t val)
Write an 8-bit signed integer as a single raw byte (I8 wire type).
void write_binary(const uint8_t *data, size_t len)
Write raw binary data as varint-length-prefixed bytes.
static constexpr size_t MAX_STRING_BYTES
Maximum string/binary field size (matches CompactDecoder::MAX_STRING_BYTES).
void write_list_header(uint8_t elem_type, int32_t size)
Write a list header.
constexpr uint8_t STRUCT
Nested struct.
constexpr uint8_t I32
32-bit signed integer (zigzag + varint).
constexpr uint8_t DOUBLE
IEEE 754 double (8 bytes LE).
constexpr uint8_t BOOL_FALSE
Boolean false (embedded in field header).
constexpr uint8_t SET
Set container.
constexpr uint8_t BINARY
Length-prefixed bytes (also used for STRING).
constexpr uint8_t LIST
List container.
constexpr uint8_t STOP
Struct stop marker.
constexpr uint8_t I16
16-bit signed integer (zigzag + varint).
constexpr uint8_t BOOL_TRUE
Boolean true (embedded in field header).
constexpr uint8_t MAP
Map container.
constexpr uint8_t I64
64-bit signed integer (zigzag + varint).
constexpr uint8_t I8
8-bit signed integer.