Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
types.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3#pragma once
4
24
25#include "signet/error.hpp"
26#include "signet/types.hpp"
28
29#include <cstdint>
30#include <optional>
31#include <string>
32#include <vector>
33
34namespace signet::forge::thrift {
35
36// ============================================================================
37// ยง 1 LogicalType Thrift union family (parquet-format 2.9.0, Sub-phase A)
38//
39// Implements the 5 financial/AI-relevant LogicalType union members:
40// STRING (field 1), DECIMAL (field 5), TIMESTAMP (field 9),
41// INT (field 11), UUID (field 15).
42// ============================================================================
43
48struct TimeUnit {
49 enum class Kind : int32_t { MILLIS = 1, MICROS = 2, NANOS = 3 } kind = Kind::MICROS;
50
51 TimeUnit() = default;
52 explicit TimeUnit(Kind k) : kind(k) {}
53
54 void serialize(CompactEncoder& enc) const {
55 enc.begin_struct();
56 // Write the active unit variant as an empty struct at its union field ID.
57 enc.write_field(static_cast<int16_t>(kind), compact_type::STRUCT);
58 enc.begin_struct();
59 enc.write_stop();
60 enc.end_struct();
61 enc.write_stop();
62 enc.end_struct();
63 }
64
66 dec.begin_struct();
67 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "TimeUnit: begin_struct failed"};
68 for (;;) {
69 auto [fid, ftype] = dec.read_field_header();
70 if (ftype == compact_type::STOP) break;
71 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "TimeUnit: field header error"};
72 switch (fid) {
73 case 1: // MilliSeconds
74 case 2: // MicroSeconds
75 case 3: // NanoSeconds
76 if (ftype != compact_type::STRUCT) {
77 return {ErrorCode::THRIFT_DECODE_ERROR, "TimeUnit: expected STRUCT for unit variant"};
78 }
79 kind = static_cast<Kind>(fid);
80 dec.skip_field(compact_type::STRUCT); // consume empty unit struct
81 break;
82 default:
83 dec.skip_field(ftype);
84 break;
85 }
86 }
87 dec.end_struct();
88 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "TimeUnit: decoder error"};
89 return {};
90 }
91};
92
96struct IntType {
97 int8_t bit_width = 64;
98 bool is_signed = true;
99
100 IntType() = default;
101 IntType(int8_t bw, bool s) : bit_width(bw), is_signed(s) {}
102
103 void serialize(CompactEncoder& enc) const {
104 enc.begin_struct();
106 enc.write_i8(bit_width);
108 enc.write_stop();
109 enc.end_struct();
110 }
111
113 dec.begin_struct();
114 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "IntType: begin_struct failed"};
115 for (;;) {
116 auto [fid, ftype] = dec.read_field_header();
117 if (ftype == compact_type::STOP) break;
118 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "IntType: field header error"};
119 switch (fid) {
120 case 1:
121 if (ftype != compact_type::I8) {
122 return {ErrorCode::THRIFT_DECODE_ERROR, "IntType.bitWidth: expected I8"};
123 }
124 bit_width = dec.read_i8();
125 break;
126 case 2:
127 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
128 return {ErrorCode::THRIFT_DECODE_ERROR, "IntType.isSigned: expected BOOL"};
129 }
130 is_signed = dec.read_bool();
131 break;
132 default:
133 dec.skip_field(ftype);
134 break;
135 }
136 }
137 dec.end_struct();
138 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "IntType: decoder error"};
139 return {};
140 }
141};
142
147 int32_t scale = 0;
148 int32_t precision = 0;
149
150 DecimalType() = default;
151 DecimalType(int32_t s, int32_t p) : scale(s), precision(p) {}
152
153 void serialize(CompactEncoder& enc) const {
154 enc.begin_struct();
156 enc.write_i32(scale);
158 enc.write_i32(precision);
159 enc.write_stop();
160 enc.end_struct();
161 }
162
164 dec.begin_struct();
165 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DecimalType: begin_struct failed"};
166 for (;;) {
167 auto [fid, ftype] = dec.read_field_header();
168 if (ftype == compact_type::STOP) break;
169 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DecimalType: field header error"};
170 switch (fid) {
171 case 1:
172 if (ftype != compact_type::I32) {
173 return {ErrorCode::THRIFT_DECODE_ERROR, "DecimalType.scale: expected I32"};
174 }
175 scale = dec.read_i32();
176 break;
177 case 2:
178 if (ftype != compact_type::I32) {
179 return {ErrorCode::THRIFT_DECODE_ERROR, "DecimalType.precision: expected I32"};
180 }
181 precision = dec.read_i32();
182 break;
183 default:
184 dec.skip_field(ftype);
185 break;
186 }
187 }
188 dec.end_struct();
189 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DecimalType: decoder error"};
190 return {};
191 }
192};
193
198 bool is_adjusted_to_utc = true;
200
201 TimestampType() = default;
203
204 void serialize(CompactEncoder& enc) const {
205 enc.begin_struct();
208 unit.serialize(enc);
209 enc.write_stop();
210 enc.end_struct();
211 }
212
214 dec.begin_struct();
215 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "TimestampType: begin_struct failed"};
216 for (;;) {
217 auto [fid, ftype] = dec.read_field_header();
218 if (ftype == compact_type::STOP) break;
219 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "TimestampType: field header error"};
220 switch (fid) {
221 case 1:
222 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
223 return {ErrorCode::THRIFT_DECODE_ERROR, "TimestampType.isAdjustedToUTC: expected BOOL"};
224 }
226 break;
227 case 2:
228 if (ftype != compact_type::STRUCT) {
229 return {ErrorCode::THRIFT_DECODE_ERROR, "TimestampType.unit: expected STRUCT"};
230 }
231 if (auto r = unit.deserialize(dec); !r.has_value()) return r.error();
232 break;
233 default:
234 dec.skip_field(ftype);
235 break;
236 }
237 }
238 dec.end_struct();
239 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "TimestampType: decoder error"};
240 return {};
241 }
242};
243
250 enum class Kind : int32_t {
251 NONE = 0,
252 STRING = 1,
253 DECIMAL = 5,
254 TIMESTAMP = 9,
255 INT = 11,
256 UUID = 15,
258
259 std::optional<DecimalType> decimal;
260 std::optional<TimestampType> timestamp;
261 std::optional<IntType> integer;
262 // STRING and UUID carry no payload; presence is implied by kind.
263
264 LogicalTypeUnion() = default;
265
266 void serialize(CompactEncoder& enc) const {
267 enc.begin_struct();
268 switch (kind) {
269 case Kind::STRING:
270 // StringType: field 1, empty struct body
272 enc.begin_struct(); enc.write_stop(); enc.end_struct();
273 break;
274 case Kind::DECIMAL:
275 if (decimal.has_value()) {
277 decimal->serialize(enc);
278 }
279 break;
280 case Kind::TIMESTAMP:
281 if (timestamp.has_value()) {
283 timestamp->serialize(enc);
284 }
285 break;
286 case Kind::INT:
287 if (integer.has_value()) {
289 integer->serialize(enc);
290 }
291 break;
292 case Kind::UUID:
293 // UUIDType: field 15, empty struct body
295 enc.begin_struct(); enc.write_stop(); enc.end_struct();
296 break;
297 case Kind::NONE:
298 break;
299 }
300 enc.write_stop();
301 enc.end_struct();
302 }
303
305 dec.begin_struct();
306 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "LogicalTypeUnion: begin_struct failed"};
307 for (;;) {
308 auto [fid, ftype] = dec.read_field_header();
309 if (ftype == compact_type::STOP) break;
310 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "LogicalTypeUnion: field header error"};
311 switch (fid) {
312 case 1: // StringType
313 if (ftype != compact_type::STRUCT) {
314 return {ErrorCode::THRIFT_DECODE_ERROR, "LogicalTypeUnion.STRING: expected STRUCT"};
315 }
317 dec.skip_field(compact_type::STRUCT); // consume empty StringType body
318 break;
319 case 5: // DecimalType
320 if (ftype != compact_type::STRUCT) {
321 return {ErrorCode::THRIFT_DECODE_ERROR, "LogicalTypeUnion.DECIMAL: expected STRUCT"};
322 }
324 decimal.emplace();
325 if (auto r = decimal->deserialize(dec); !r.has_value()) return r.error();
326 break;
327 case 9: // TimestampType
328 if (ftype != compact_type::STRUCT) {
329 return {ErrorCode::THRIFT_DECODE_ERROR, "LogicalTypeUnion.TIMESTAMP: expected STRUCT"};
330 }
332 timestamp.emplace();
333 if (auto r = timestamp->deserialize(dec); !r.has_value()) return r.error();
334 break;
335 case 11: // IntType
336 if (ftype != compact_type::STRUCT) {
337 return {ErrorCode::THRIFT_DECODE_ERROR, "LogicalTypeUnion.INT: expected STRUCT"};
338 }
339 kind = Kind::INT;
340 integer.emplace();
341 if (auto r = integer->deserialize(dec); !r.has_value()) return r.error();
342 break;
343 case 15: // UUIDType
344 if (ftype != compact_type::STRUCT) {
345 return {ErrorCode::THRIFT_DECODE_ERROR, "LogicalTypeUnion.UUID: expected STRUCT"};
346 }
348 dec.skip_field(compact_type::STRUCT); // consume empty UUIDType body
349 break;
350 default:
351 dec.skip_field(ftype);
352 break;
353 }
354 }
355 dec.end_struct();
356 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "LogicalTypeUnion: decoder error"};
357 return {};
358 }
359};
360
361// ============================================================================
362// ยง 2 Core Parquet Thrift structs (existing, updated for parquet-format 2.9.0)
363// ============================================================================
364
370 std::optional<std::string> max;
371 std::optional<std::string> min;
372 std::optional<int64_t> null_count;
373 std::optional<int64_t> distinct_count;
374 std::optional<std::string> max_value;
375 std::optional<std::string> min_value;
376
377 Statistics() = default;
378
379 void serialize(CompactEncoder& enc) const {
380 enc.begin_struct();
381 if (max.has_value()) {
383 enc.write_string(*max);
384 }
385 if (min.has_value()) {
387 enc.write_string(*min);
388 }
389 if (null_count.has_value()) {
391 enc.write_i64(*null_count);
392 }
393 if (distinct_count.has_value()) {
396 }
397 if (max_value.has_value()) {
400 }
401 if (min_value.has_value()) {
404 }
405 enc.write_stop();
406 enc.end_struct();
407 }
408
410 dec.begin_struct();
411 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics: begin_struct failed"};
412 for (;;) {
413 auto [fid, ftype] = dec.read_field_header();
414 if (ftype == compact_type::STOP) break;
415 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics: field header error"};
416 switch (fid) {
417 case 1:
418 if (ftype != compact_type::BINARY) {
419 return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics.max: expected BINARY"};
420 }
421 max = dec.read_string();
422 break;
423 case 2:
424 if (ftype != compact_type::BINARY) {
425 return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics.min: expected BINARY"};
426 }
427 min = dec.read_string();
428 break;
429 case 3:
430 if (ftype != compact_type::I64) {
431 return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics.null_count: expected I64"};
432 }
433 null_count = dec.read_i64();
434 break;
435 case 4:
436 if (ftype != compact_type::I64) {
437 return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics.distinct_count: expected I64"};
438 }
439 distinct_count = dec.read_i64();
440 break;
441 case 5:
442 if (ftype != compact_type::BINARY) {
443 return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics.max_value: expected BINARY"};
444 }
445 max_value = dec.read_string();
446 break;
447 case 6:
448 if (ftype != compact_type::BINARY) {
449 return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics.min_value: expected BINARY"};
450 }
451 min_value = dec.read_string();
452 break;
453 default:
454 dec.skip_field(ftype);
455 break;
456 }
457 }
458 dec.end_struct();
459 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "Statistics: decoder error"};
460 return {};
461 }
462};
463
468struct KeyValue {
469 std::string key;
470 std::optional<std::string> value;
471
472 KeyValue() = default;
473 KeyValue(std::string k, std::string v)
474 : key(std::move(k)), value(std::move(v)) {}
475
476 void serialize(CompactEncoder& enc) const {
477 enc.begin_struct();
479 enc.write_string(key);
480 if (value.has_value()) {
482 enc.write_string(*value);
483 }
484 enc.write_stop();
485 enc.end_struct();
486 }
487
489 dec.begin_struct();
490 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "KeyValue: begin_struct failed"};
491 uint32_t seen = 0; // Required-field bitmask: bit 0 = field 1 (key)
492 for (;;) {
493 auto [fid, ftype] = dec.read_field_header();
494 if (ftype == compact_type::STOP) break;
495 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "KeyValue: field header error"};
496 switch (fid) {
497 case 1:
498 if (ftype != compact_type::BINARY) {
499 return {ErrorCode::THRIFT_DECODE_ERROR, "KeyValue.key: expected BINARY"};
500 }
501 key = dec.read_string();
502 seen |= (1u << 0);
503 break;
504 case 2:
505 if (ftype != compact_type::BINARY) {
506 return {ErrorCode::THRIFT_DECODE_ERROR, "KeyValue.value: expected BINARY"};
507 }
508 value = dec.read_string();
509 break;
510 default:
511 dec.skip_field(ftype);
512 break;
513 }
514 }
515 dec.end_struct();
516 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "KeyValue: decoder error"};
517 // Required-field validation
518 if ((seen & 0x01u) == 0u) {
519 return {ErrorCode::THRIFT_DECODE_ERROR, "KeyValue: missing required field 1 (key)"};
520 }
521 return {};
522 }
523};
524
531 std::optional<PhysicalType> type;
532 std::optional<int32_t> type_length;
533 std::optional<Repetition> repetition_type;
534 std::string name;
535 std::optional<int32_t> num_children;
536 std::optional<ConvertedType> converted_type;
537 std::optional<int32_t> scale;
538 std::optional<int32_t> precision;
539 std::optional<int32_t> field_id;
540 std::optional<LogicalTypeUnion> logical_type;
541
542 SchemaElement() = default;
543
544 void serialize(CompactEncoder& enc) const {
545 enc.begin_struct();
546 if (type.has_value()) {
548 enc.write_i32(static_cast<int32_t>(*type));
549 }
550 if (type_length.has_value()) {
553 }
554 if (repetition_type.has_value()) {
556 enc.write_i32(static_cast<int32_t>(*repetition_type));
557 }
558 // field 4: name โ€” always written (required)
560 enc.write_string(name);
561 if (num_children.has_value()) {
564 }
565 if (converted_type.has_value()) {
567 enc.write_i32(static_cast<int32_t>(*converted_type));
568 }
569 if (scale.has_value()) {
571 enc.write_i32(*scale);
572 }
573 if (precision.has_value()) {
575 enc.write_i32(*precision);
576 }
577 if (field_id.has_value()) {
579 enc.write_i32(*field_id);
580 }
581 if (logical_type.has_value()) {
583 logical_type->serialize(enc);
584 }
585 enc.write_stop();
586 enc.end_struct();
587 }
588
590 dec.begin_struct();
591 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement: begin_struct failed"};
592 for (;;) {
593 auto [fid, ftype] = dec.read_field_header();
594 if (ftype == compact_type::STOP) break;
595 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement: field header error"};
596 switch (fid) {
597 case 1:
598 if (ftype != compact_type::I32) {
599 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.type: expected I32"};
600 }
601 type = static_cast<PhysicalType>(dec.read_i32());
602 break;
603 case 2:
604 if (ftype != compact_type::I32) {
605 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.type_length: expected I32"};
606 }
607 type_length = dec.read_i32();
608 break;
609 case 3:
610 if (ftype != compact_type::I32) {
611 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.repetition_type: expected I32"};
612 }
613 repetition_type = static_cast<Repetition>(dec.read_i32());
614 break;
615 case 4:
616 if (ftype != compact_type::BINARY) {
617 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.name: expected BINARY"};
618 }
619 name = dec.read_string();
620 break;
621 case 5:
622 if (ftype != compact_type::I32) {
623 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.num_children: expected I32"};
624 }
625 num_children = dec.read_i32();
626 break;
627 case 6:
628 if (ftype != compact_type::I32) {
629 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.converted_type: expected I32"};
630 }
631 converted_type = static_cast<ConvertedType>(dec.read_i32());
632 break;
633 case 7:
634 if (ftype != compact_type::I32) {
635 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.scale: expected I32"};
636 }
637 scale = dec.read_i32();
638 break;
639 case 8:
640 if (ftype != compact_type::I32) {
641 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.precision: expected I32"};
642 }
643 precision = dec.read_i32();
644 break;
645 case 9:
646 if (ftype != compact_type::I32) {
647 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.field_id: expected I32"};
648 }
649 field_id = dec.read_i32();
650 break;
651 case 10: {
652 if (ftype != compact_type::STRUCT) {
653 return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement.logicalType: expected STRUCT"};
654 }
655 logical_type.emplace();
656 if (auto r = logical_type->deserialize(dec); !r.has_value()) return r.error();
657 break;
658 }
659 default:
660 dec.skip_field(ftype);
661 break;
662 }
663 }
664 dec.end_struct();
665 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "SchemaElement: decoder error"};
666 return {};
667 }
668};
669
675 int32_t num_values = 0;
679 std::optional<Statistics> statistics;
680
681 DataPageHeader() = default;
682
683 void serialize(CompactEncoder& enc) const {
684 enc.begin_struct();
688 enc.write_i32(static_cast<int32_t>(encoding));
690 enc.write_i32(static_cast<int32_t>(definition_level_encoding));
692 enc.write_i32(static_cast<int32_t>(repetition_level_encoding));
693 if (statistics.has_value()) {
695 statistics->serialize(enc);
696 }
697 enc.write_stop();
698 enc.end_struct();
699 }
700
702 dec.begin_struct();
703 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeader: begin_struct failed"};
704 uint32_t seen = 0; // Required-field bitmask: bits 0-3 = fields 1-4
705 for (;;) {
706 auto [fid, ftype] = dec.read_field_header();
707 if (ftype == compact_type::STOP) break;
708 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeader: field header error"};
709 switch (fid) {
710 case 1:
711 if (ftype != compact_type::I32) {
712 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeader.num_values: expected I32"};
713 }
714 num_values = dec.read_i32();
715 seen |= (1u << 0);
716 break;
717 case 2:
718 if (ftype != compact_type::I32) {
719 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeader.encoding: expected I32"};
720 }
721 encoding = static_cast<Encoding>(dec.read_i32());
722 seen |= (1u << 1);
723 break;
724 case 3:
725 if (ftype != compact_type::I32) {
727 "DataPageHeader.definition_level_encoding: expected I32"};
728 }
729 definition_level_encoding = static_cast<Encoding>(dec.read_i32());
730 seen |= (1u << 2);
731 break;
732 case 4:
733 if (ftype != compact_type::I32) {
735 "DataPageHeader.repetition_level_encoding: expected I32"};
736 }
737 repetition_level_encoding = static_cast<Encoding>(dec.read_i32());
738 seen |= (1u << 3);
739 break;
740 case 5: {
741 if (ftype != compact_type::STRUCT) {
742 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeader.statistics: expected STRUCT"};
743 }
744 statistics.emplace();
745 if (auto r = statistics->deserialize(dec); !r.has_value()) return r.error();
746 break;
747 }
748 default:
749 dec.skip_field(ftype);
750 break;
751 }
752 }
753 dec.end_struct();
754 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeader: decoder error"};
755 // Required-field validation (fields 1-4, mask = 0x0F)
756 if ((seen & 0x0Fu) != 0x0Fu) {
757 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeader: missing one or more required fields (1-4)"};
758 }
759 return {};
760 }
761};
762
767 int32_t num_values = 0;
769 std::optional<bool> is_sorted;
770
772
773 void serialize(CompactEncoder& enc) const {
774 enc.begin_struct();
778 enc.write_i32(static_cast<int32_t>(encoding));
779 if (is_sorted.has_value()) {
781 }
782 enc.write_stop();
783 enc.end_struct();
784 }
785
787 dec.begin_struct();
788 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DictionaryPageHeader: begin_struct failed"};
789 for (;;) {
790 auto [fid, ftype] = dec.read_field_header();
791 if (ftype == compact_type::STOP) break;
792 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DictionaryPageHeader: field header error"};
793 switch (fid) {
794 case 1:
795 if (ftype != compact_type::I32) {
796 return {ErrorCode::THRIFT_DECODE_ERROR, "DictionaryPageHeader.num_values: expected I32"};
797 }
798 num_values = dec.read_i32();
799 break;
800 case 2:
801 if (ftype != compact_type::I32) {
802 return {ErrorCode::THRIFT_DECODE_ERROR, "DictionaryPageHeader.encoding: expected I32"};
803 }
804 encoding = static_cast<Encoding>(dec.read_i32());
805 break;
806 case 3:
807 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
809 "DictionaryPageHeader.is_sorted: expected BOOL"};
810 }
811 is_sorted = dec.read_bool();
812 break;
813 default:
814 dec.skip_field(ftype);
815 break;
816 }
817 }
818 dec.end_struct();
819 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DictionaryPageHeader: decoder error"};
820 return {};
821 }
822};
823
829 int32_t num_values = 0;
830 int32_t num_nulls = 0;
831 int32_t num_rows = 0;
835 std::optional<bool> is_compressed;
836
837 DataPageHeaderV2() = default;
838
839 void serialize(CompactEncoder& enc) const {
840 enc.begin_struct();
844 enc.write_i32(num_nulls);
846 enc.write_i32(num_rows);
848 enc.write_i32(static_cast<int32_t>(encoding));
853 if (is_compressed.has_value()) {
855 }
856 enc.write_stop();
857 enc.end_struct();
858 }
859
861 dec.begin_struct();
862 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeaderV2: begin_struct failed"};
863 for (;;) {
864 auto [fid, ftype] = dec.read_field_header();
865 if (ftype == compact_type::STOP) break;
866 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeaderV2: field header error"};
867 switch (fid) {
868 case 1:
869 if (ftype != compact_type::I32) {
870 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeaderV2.num_values: expected I32"};
871 }
872 num_values = dec.read_i32();
873 break;
874 case 2:
875 if (ftype != compact_type::I32) {
876 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeaderV2.num_nulls: expected I32"};
877 }
878 num_nulls = dec.read_i32();
879 break;
880 case 3:
881 if (ftype != compact_type::I32) {
882 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeaderV2.num_rows: expected I32"};
883 }
884 num_rows = dec.read_i32();
885 break;
886 case 4:
887 if (ftype != compact_type::I32) {
888 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeaderV2.encoding: expected I32"};
889 }
890 encoding = static_cast<Encoding>(dec.read_i32());
891 break;
892 case 5:
893 if (ftype != compact_type::I32) {
895 "DataPageHeaderV2.definition_levels_byte_length: expected I32"};
896 }
898 break;
899 case 6:
900 if (ftype != compact_type::I32) {
902 "DataPageHeaderV2.repetition_levels_byte_length: expected I32"};
903 }
905 break;
906 case 7:
907 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
908 return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeaderV2.is_compressed: expected BOOL"};
909 }
910 is_compressed = dec.read_bool();
911 break;
912 default:
913 dec.skip_field(ftype);
914 break;
915 }
916 }
917 dec.end_struct();
918 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "DataPageHeaderV2: decoder error"};
919 return {};
920 }
921
923 [[nodiscard]] bool effective_is_compressed() const {
924 return is_compressed.value_or(true);
925 }
926};
927
937 std::optional<int32_t> crc;
938 std::optional<DataPageHeader> data_page_header;
939 // field 6: index_page_header โ€” skipped (not used in Signet)
940 std::optional<DictionaryPageHeader> dictionary_page_header;
941 std::optional<DataPageHeaderV2> data_page_header_v2;
942
943 PageHeader() = default;
944
945 void serialize(CompactEncoder& enc) const {
946 enc.begin_struct();
948 enc.write_i32(static_cast<int32_t>(type));
953 if (crc.has_value()) {
955 enc.write_i32(*crc);
956 }
957 if (data_page_header.has_value()) {
959 data_page_header->serialize(enc);
960 }
961 // field 6: index_page_header โ€” skipped
962 if (dictionary_page_header.has_value()) {
964 dictionary_page_header->serialize(enc);
965 }
966 if (data_page_header_v2.has_value()) {
968 data_page_header_v2->serialize(enc);
969 }
970 enc.write_stop();
971 enc.end_struct();
972 }
973
975 dec.begin_struct();
976 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "PageHeader: begin_struct failed"};
977 for (;;) {
978 auto [fid, ftype] = dec.read_field_header();
979 if (ftype == compact_type::STOP) break;
980 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "PageHeader: field header error"};
981 switch (fid) {
982 case 1:
983 if (ftype != compact_type::I32) {
984 return {ErrorCode::THRIFT_DECODE_ERROR, "PageHeader.type: expected I32"};
985 }
986 type = static_cast<PageType>(dec.read_i32());
987 break;
988 case 2:
989 if (ftype != compact_type::I32) {
990 return {ErrorCode::THRIFT_DECODE_ERROR, "PageHeader.uncompressed_page_size: expected I32"};
991 }
993 break;
994 case 3:
995 if (ftype != compact_type::I32) {
996 return {ErrorCode::THRIFT_DECODE_ERROR, "PageHeader.compressed_page_size: expected I32"};
997 }
999 break;
1000 case 4:
1001 if (ftype != compact_type::I32) {
1002 return {ErrorCode::THRIFT_DECODE_ERROR, "PageHeader.crc: expected I32"};
1003 }
1004 crc = dec.read_i32();
1005 break;
1006 case 5: {
1007 if (ftype != compact_type::STRUCT) {
1009 "PageHeader.data_page_header: expected STRUCT"};
1010 }
1011 data_page_header.emplace();
1012 if (auto r = data_page_header->deserialize(dec); !r.has_value()) return r.error();
1013 break;
1014 }
1015 case 6:
1016 dec.skip_field(ftype); // index_page_header โ€” skipped
1017 break;
1018 case 7: {
1019 if (ftype != compact_type::STRUCT) {
1021 "PageHeader.dictionary_page_header: expected STRUCT"};
1022 }
1023 dictionary_page_header.emplace();
1024 if (auto r = dictionary_page_header->deserialize(dec); !r.has_value()) return r.error();
1025 break;
1026 }
1027 case 8: {
1028 if (ftype != compact_type::STRUCT) {
1030 "PageHeader.data_page_header_v2: expected STRUCT"};
1031 }
1032 data_page_header_v2.emplace();
1033 if (auto r = data_page_header_v2->deserialize(dec); !r.has_value()) return r.error();
1034 break;
1035 }
1036 default:
1037 dec.skip_field(ftype);
1038 break;
1039 }
1040 }
1041 dec.end_struct();
1042 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "PageHeader: decoder error"};
1043 return {};
1044 }
1045};
1046
1047// ============================================================================
1048// ยง 3 Column metadata (parquet.thrift ColumnMetaData, fields 1-12)
1049// ============================================================================
1050
1058 std::vector<Encoding> encodings;
1059 std::vector<std::string> path_in_schema;
1061 int64_t num_values = 0;
1064 std::optional<std::vector<KeyValue>> key_value_metadata;
1065 int64_t data_page_offset = 0;
1066 std::optional<int64_t> index_page_offset;
1067 std::optional<int64_t> dictionary_page_offset;
1068 std::optional<Statistics> statistics;
1069
1070 ColumnMetaData() = default;
1071
1072 void serialize(CompactEncoder& enc) const {
1073 enc.begin_struct();
1075 enc.write_i32(static_cast<int32_t>(type));
1076
1078 enc.write_list_header(compact_type::I32, static_cast<int32_t>(encodings.size()));
1079 for (auto e : encodings) {
1080 enc.write_i32(static_cast<int32_t>(e));
1081 }
1082
1084 enc.write_list_header(compact_type::BINARY, static_cast<int32_t>(path_in_schema.size()));
1085 for (const auto& p : path_in_schema) {
1086 enc.write_string(p);
1087 }
1088
1090 enc.write_i32(static_cast<int32_t>(codec));
1092 enc.write_i64(num_values);
1097
1098 if (key_value_metadata.has_value()) {
1101 static_cast<int32_t>(key_value_metadata->size()));
1102 for (const auto& kv : *key_value_metadata) {
1103 kv.serialize(enc);
1104 }
1105 }
1106
1109
1110 if (index_page_offset.has_value()) {
1113 }
1114 if (dictionary_page_offset.has_value()) {
1117 }
1118 if (statistics.has_value()) {
1120 statistics->serialize(enc);
1121 }
1122
1123 enc.write_stop();
1124 enc.end_struct();
1125 }
1126
1128 dec.begin_struct();
1129 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnMetaData: begin_struct failed"};
1130 // Required-field bitmask:
1131 // bit 0 = field 1 (type), bit 1 = field 2 (encodings),
1132 // bit 2 = field 3 (path_in_schema), bit 3 = field 4 (codec),
1133 // bit 4 = field 5 (num_values), bit 5 = field 6 (total_uncompressed_size),
1134 // bit 6 = field 7 (total_compressed_size), bit 7 = field 9 (data_page_offset)
1135 uint32_t seen = 0;
1136 for (;;) {
1137 auto [fid, ftype] = dec.read_field_header();
1138 if (ftype == compact_type::STOP) break;
1139 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnMetaData: field header error"};
1140 switch (fid) {
1141 case 1:
1142 if (ftype != compact_type::I32) {
1143 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnMetaData.type: expected I32"};
1144 }
1145 type = static_cast<PhysicalType>(dec.read_i32());
1146 seen |= (1u << 0);
1147 break;
1148 case 2: {
1149 if (ftype != compact_type::LIST) {
1150 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnMetaData.encodings: expected LIST"};
1151 }
1152 auto [elem_type, count] = dec.read_list_header();
1153 static constexpr int32_t MAX_ENCODINGS = 10000;
1154 if (count < 0 || count > MAX_ENCODINGS) {
1156 "ColumnMetaData.encodings: list exceeds maximum size"};
1157 }
1158 encodings.resize(static_cast<size_t>(count));
1159 for (int32_t i = 0; i < count; ++i) {
1160 encodings[static_cast<size_t>(i)] =
1161 static_cast<Encoding>(dec.read_i32());
1162 }
1163 seen |= (1u << 1);
1164 break;
1165 }
1166 case 3: {
1167 if (ftype != compact_type::LIST) {
1169 "ColumnMetaData.path_in_schema: expected LIST"};
1170 }
1171 auto [elem_type, count] = dec.read_list_header();
1172 static constexpr int32_t MAX_PATH_ELEMS = 10000;
1173 if (count < 0 || count > MAX_PATH_ELEMS) {
1175 "ColumnMetaData.path_in_schema: list exceeds maximum size"};
1176 }
1177 path_in_schema.resize(static_cast<size_t>(count));
1178 for (int32_t i = 0; i < count; ++i) {
1179 path_in_schema[static_cast<size_t>(i)] = dec.read_string();
1180 }
1181 seen |= (1u << 2);
1182 break;
1183 }
1184 case 4:
1185 if (ftype != compact_type::I32) {
1186 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnMetaData.codec: expected I32"};
1187 }
1188 codec = static_cast<Compression>(dec.read_i32());
1189 seen |= (1u << 3);
1190 break;
1191 case 5:
1192 if (ftype != compact_type::I64) {
1193 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnMetaData.num_values: expected I64"};
1194 }
1195 num_values = dec.read_i64();
1196 seen |= (1u << 4);
1197 break;
1198 case 6:
1199 if (ftype != compact_type::I64) {
1201 "ColumnMetaData.total_uncompressed_size: expected I64"};
1202 }
1204 seen |= (1u << 5);
1205 break;
1206 case 7:
1207 if (ftype != compact_type::I64) {
1209 "ColumnMetaData.total_compressed_size: expected I64"};
1210 }
1212 seen |= (1u << 6);
1213 break;
1214 case 8: {
1215 if (ftype != compact_type::LIST) {
1217 "ColumnMetaData.key_value_metadata: expected LIST"};
1218 }
1219 auto [elem_type, count] = dec.read_list_header();
1220 static constexpr int32_t MAX_STRUCT_LIST_SIZE = 10000;
1221 if (count < 0 || count > MAX_STRUCT_LIST_SIZE) {
1223 "ColumnMetaData.key_value_metadata: list exceeds maximum size"};
1224 }
1225 key_value_metadata.emplace();
1226 key_value_metadata->resize(static_cast<size_t>(count));
1227 for (int32_t i = 0; i < count; ++i) {
1228 if (auto r = (*key_value_metadata)[static_cast<size_t>(i)].deserialize(dec);
1229 !r.has_value()) {
1230 return r.error();
1231 }
1232 }
1233 break;
1234 }
1235 case 9:
1236 if (ftype != compact_type::I64) {
1238 "ColumnMetaData.data_page_offset: expected I64"};
1239 }
1240 data_page_offset = dec.read_i64();
1241 seen |= (1u << 7);
1242 break;
1243 case 10:
1244 if (ftype != compact_type::I64) {
1246 "ColumnMetaData.index_page_offset: expected I64"};
1247 }
1249 break;
1250 case 11:
1251 if (ftype != compact_type::I64) {
1253 "ColumnMetaData.dictionary_page_offset: expected I64"};
1254 }
1256 break;
1257 case 12: {
1258 if (ftype != compact_type::STRUCT) {
1260 "ColumnMetaData.statistics: expected STRUCT"};
1261 }
1262 statistics.emplace();
1263 if (auto r = statistics->deserialize(dec); !r.has_value()) return r.error();
1264 break;
1265 }
1266 default:
1267 dec.skip_field(ftype);
1268 break;
1269 }
1270 }
1271 dec.end_struct();
1272 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnMetaData: decoder error"};
1273 // Required-field validation: bits 0-7 (fields 1,2,3,4,5,6,7,9)
1274 if ((seen & 0xFFu) != 0xFFu) {
1276 "ColumnMetaData: missing one or more required fields (1-7, 9)"};
1277 }
1278 return {};
1279 }
1280};
1281
1282// ============================================================================
1283// ยง 4 Encryption Thrift types (parquet-format 2.9.0, Option A: AES-GCM-V1)
1284//
1285// Canonical typed structs for Parquet Modular Encryption (PME) metadata.
1286// Signet extensions are isolated in key_value_metadata with "signet." prefix.
1287// ============================================================================
1288
1290struct AesGcmV1 {
1291 std::optional<std::vector<uint8_t>> aad_prefix;
1292 std::optional<bool> aad_file_unique;
1293 std::optional<bool> supply_aad_prefix;
1294
1295 AesGcmV1() = default;
1296
1297 void serialize(CompactEncoder& enc) const {
1298 enc.begin_struct();
1299 if (aad_prefix.has_value()) {
1301 enc.write_binary(aad_prefix->data(), aad_prefix->size());
1302 }
1303 if (aad_file_unique.has_value()) {
1305 }
1306 if (supply_aad_prefix.has_value()) {
1308 }
1309 enc.write_stop();
1310 enc.end_struct();
1311 }
1312
1314 dec.begin_struct();
1315 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmV1: begin_struct failed"};
1316 for (;;) {
1317 auto [fid, ftype] = dec.read_field_header();
1318 if (ftype == compact_type::STOP) break;
1319 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmV1: field header error"};
1320 switch (fid) {
1321 case 1:
1322 if (ftype != compact_type::BINARY) {
1323 return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmV1.aad_prefix: expected BINARY"};
1324 }
1325 aad_prefix = dec.read_binary();
1326 break;
1327 case 2:
1328 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
1329 return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmV1.aad_file_unique: expected BOOL"};
1330 }
1331 aad_file_unique = dec.read_bool();
1332 break;
1333 case 3:
1334 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
1335 return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmV1.supply_aad_prefix: expected BOOL"};
1336 }
1338 break;
1339 default:
1340 dec.skip_field(ftype);
1341 break;
1342 }
1343 }
1344 dec.end_struct();
1345 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmV1: decoder error"};
1346 return {};
1347 }
1348};
1349
1352 std::optional<std::vector<uint8_t>> aad_prefix;
1353 std::optional<bool> aad_file_unique;
1354 std::optional<bool> supply_aad_prefix;
1355
1356 AesGcmCtrV1() = default;
1357
1358 void serialize(CompactEncoder& enc) const {
1359 enc.begin_struct();
1360 if (aad_prefix.has_value()) {
1362 enc.write_binary(aad_prefix->data(), aad_prefix->size());
1363 }
1364 if (aad_file_unique.has_value()) {
1366 }
1367 if (supply_aad_prefix.has_value()) {
1369 }
1370 enc.write_stop();
1371 enc.end_struct();
1372 }
1373
1375 dec.begin_struct();
1376 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmCtrV1: begin_struct failed"};
1377 for (;;) {
1378 auto [fid, ftype] = dec.read_field_header();
1379 if (ftype == compact_type::STOP) break;
1380 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmCtrV1: field header error"};
1381 switch (fid) {
1382 case 1:
1383 if (ftype != compact_type::BINARY) {
1384 return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmCtrV1.aad_prefix: expected BINARY"};
1385 }
1386 aad_prefix = dec.read_binary();
1387 break;
1388 case 2:
1389 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
1390 return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmCtrV1.aad_file_unique: expected BOOL"};
1391 }
1392 aad_file_unique = dec.read_bool();
1393 break;
1394 case 3:
1395 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
1396 return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmCtrV1.supply_aad_prefix: expected BOOL"};
1397 }
1399 break;
1400 default:
1401 dec.skip_field(ftype);
1402 break;
1403 }
1404 }
1405 dec.end_struct();
1406 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "AesGcmCtrV1: decoder error"};
1407 return {};
1408 }
1409};
1410
1413 enum class Kind : int32_t { NONE = 0, AES_GCM_V1 = 1, AES_GCM_CTR_V1 = 2 } kind = Kind::NONE;
1414 std::optional<AesGcmV1> aes_gcm_v1;
1415 std::optional<AesGcmCtrV1> aes_gcm_ctr_v1;
1416
1418
1419 void serialize(CompactEncoder& enc) const {
1420 enc.begin_struct();
1421 switch (kind) {
1422 case Kind::AES_GCM_V1:
1424 aes_gcm_v1.value_or(AesGcmV1{}).serialize(enc);
1425 break;
1428 aes_gcm_ctr_v1.value_or(AesGcmCtrV1{}).serialize(enc);
1429 break;
1430 case Kind::NONE:
1431 break;
1432 }
1433 enc.write_stop();
1434 enc.end_struct();
1435 }
1436
1438 dec.begin_struct();
1439 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "EncryptionAlgorithm: begin_struct failed"};
1440 for (;;) {
1441 auto [fid, ftype] = dec.read_field_header();
1442 if (ftype == compact_type::STOP) break;
1443 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "EncryptionAlgorithm: field header error"};
1444 switch (fid) {
1445 case 1: {
1446 if (ftype != compact_type::STRUCT) {
1448 "EncryptionAlgorithm.AES_GCM_V1: expected STRUCT"};
1449 }
1451 aes_gcm_v1.emplace();
1452 if (auto r = aes_gcm_v1->deserialize(dec); !r.has_value()) return r.error();
1453 break;
1454 }
1455 case 2: {
1456 if (ftype != compact_type::STRUCT) {
1458 "EncryptionAlgorithm.AES_GCM_CTR_V1: expected STRUCT"};
1459 }
1461 aes_gcm_ctr_v1.emplace();
1462 if (auto r = aes_gcm_ctr_v1->deserialize(dec); !r.has_value()) return r.error();
1463 break;
1464 }
1465 default:
1466 dec.skip_field(ftype);
1467 break;
1468 }
1469 }
1470 dec.end_struct();
1471 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "EncryptionAlgorithm: decoder error"};
1472 return {};
1473 }
1474};
1475
1478 std::vector<std::string> path_in_schema;
1479 std::optional<std::vector<uint8_t>> key_metadata;
1480
1482
1483 void serialize(CompactEncoder& enc) const {
1484 enc.begin_struct();
1486 enc.write_list_header(compact_type::BINARY, static_cast<int32_t>(path_in_schema.size()));
1487 for (const auto& p : path_in_schema) {
1488 enc.write_string(p);
1489 }
1490 if (key_metadata.has_value()) {
1492 enc.write_binary(key_metadata->data(), key_metadata->size());
1493 }
1494 enc.write_stop();
1495 enc.end_struct();
1496 }
1497
1499 dec.begin_struct();
1500 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "EncryptionWithColumnKey: begin_struct failed"};
1501 for (;;) {
1502 auto [fid, ftype] = dec.read_field_header();
1503 if (ftype == compact_type::STOP) break;
1504 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "EncryptionWithColumnKey: field header error"};
1505 switch (fid) {
1506 case 1: {
1507 if (ftype != compact_type::LIST) {
1509 "EncryptionWithColumnKey.path_in_schema: expected LIST"};
1510 }
1511 auto [elem_type, count] = dec.read_list_header();
1512 static constexpr int32_t MAX_PATH = 10000;
1513 if (count < 0 || count > MAX_PATH) {
1515 "EncryptionWithColumnKey.path_in_schema: list exceeds maximum size"};
1516 }
1517 path_in_schema.resize(static_cast<size_t>(count));
1518 for (int32_t i = 0; i < count; ++i) {
1519 path_in_schema[static_cast<size_t>(i)] = dec.read_string();
1520 }
1521 break;
1522 }
1523 case 2:
1524 if (ftype != compact_type::BINARY) {
1526 "EncryptionWithColumnKey.key_metadata: expected BINARY"};
1527 }
1528 key_metadata = dec.read_binary();
1529 break;
1530 default:
1531 dec.skip_field(ftype);
1532 break;
1533 }
1534 }
1535 dec.end_struct();
1536 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "EncryptionWithColumnKey: decoder error"};
1537 return {};
1538 }
1539};
1540
1545 enum class Kind : int32_t {
1546 NONE = 0,
1547 FOOTER_KEY = 1,
1548 COLUMN_KEY = 2,
1550
1551 std::optional<EncryptionWithColumnKey> column_key;
1552
1554
1555 void serialize(CompactEncoder& enc) const {
1556 enc.begin_struct();
1557 switch (kind) {
1558 case Kind::FOOTER_KEY:
1560 // EncryptionWithFooterKey: empty struct
1561 enc.begin_struct(); enc.write_stop(); enc.end_struct();
1562 break;
1563 case Kind::COLUMN_KEY:
1566 break;
1567 case Kind::NONE:
1568 break;
1569 }
1570 enc.write_stop();
1571 enc.end_struct();
1572 }
1573
1575 dec.begin_struct();
1576 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnCryptoMetaData: begin_struct failed"};
1577 for (;;) {
1578 auto [fid, ftype] = dec.read_field_header();
1579 if (ftype == compact_type::STOP) break;
1580 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnCryptoMetaData: field header error"};
1581 switch (fid) {
1582 case 1: {
1583 if (ftype != compact_type::STRUCT) {
1585 "ColumnCryptoMetaData.FOOTER_KEY: expected STRUCT"};
1586 }
1588 dec.skip_field(compact_type::STRUCT); // consume empty EncryptionWithFooterKey
1589 break;
1590 }
1591 case 2: {
1592 if (ftype != compact_type::STRUCT) {
1594 "ColumnCryptoMetaData.COLUMN_KEY: expected STRUCT"};
1595 }
1597 column_key.emplace();
1598 if (auto r = column_key->deserialize(dec); !r.has_value()) return r.error();
1599 break;
1600 }
1601 default:
1602 dec.skip_field(ftype);
1603 break;
1604 }
1605 }
1606 dec.end_struct();
1607 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnCryptoMetaData: decoder error"};
1608 return {};
1609 }
1610};
1611
1619 std::optional<std::vector<uint8_t>> key_metadata;
1620
1622
1623 void serialize(CompactEncoder& enc) const {
1624 enc.begin_struct();
1627 if (key_metadata.has_value()) {
1629 enc.write_binary(key_metadata->data(), key_metadata->size());
1630 }
1631 enc.write_stop();
1632 enc.end_struct();
1633 }
1634
1636 dec.begin_struct();
1637 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "FileCryptoMetaData: begin_struct failed"};
1638 for (;;) {
1639 auto [fid, ftype] = dec.read_field_header();
1640 if (ftype == compact_type::STOP) break;
1641 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "FileCryptoMetaData: field header error"};
1642 switch (fid) {
1643 case 1: {
1644 if (ftype != compact_type::STRUCT) {
1646 "FileCryptoMetaData.encryption_algorithm: expected STRUCT"};
1647 }
1648 if (auto r = encryption_algorithm.deserialize(dec); !r.has_value()) return r.error();
1649 break;
1650 }
1651 case 2:
1652 if (ftype != compact_type::BINARY) {
1654 "FileCryptoMetaData.key_metadata: expected BINARY"};
1655 }
1656 key_metadata = dec.read_binary();
1657 break;
1658 default:
1659 dec.skip_field(ftype);
1660 break;
1661 }
1662 }
1663 dec.end_struct();
1664 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "FileCryptoMetaData: decoder error"};
1665 return {};
1666 }
1667};
1668
1669// ============================================================================
1670// ยง 5 Bloom filter Thrift types (parquet-format 2.9.0)
1671// ============================================================================
1672
1675 enum class Kind : int32_t { NONE = 0, BLOCK = 1 } kind = Kind::BLOCK;
1676
1678
1679 void serialize(CompactEncoder& enc) const {
1680 enc.begin_struct();
1681 if (kind == Kind::BLOCK) {
1683 // SplitBlockAlgorithm: empty struct
1684 enc.begin_struct(); enc.write_stop(); enc.end_struct();
1685 }
1686 enc.write_stop();
1687 enc.end_struct();
1688 }
1689
1691 dec.begin_struct();
1692 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterAlgorithm: begin_struct failed"};
1693 for (;;) {
1694 auto [fid, ftype] = dec.read_field_header();
1695 if (ftype == compact_type::STOP) break;
1696 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterAlgorithm: field header error"};
1697 switch (fid) {
1698 case 1:
1699 if (ftype != compact_type::STRUCT) {
1701 "BloomFilterAlgorithm.BLOCK: expected STRUCT"};
1702 }
1703 kind = Kind::BLOCK;
1705 break;
1706 default:
1707 dec.skip_field(ftype);
1708 break;
1709 }
1710 }
1711 dec.end_struct();
1712 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterAlgorithm: decoder error"};
1713 return {};
1714 }
1715};
1716
1719 enum class Kind : int32_t { NONE = 0, XXHASH = 1 } kind = Kind::XXHASH;
1720
1721 BloomFilterHash() = default;
1722
1723 void serialize(CompactEncoder& enc) const {
1724 enc.begin_struct();
1725 if (kind == Kind::XXHASH) {
1727 // XxHash: empty struct
1728 enc.begin_struct(); enc.write_stop(); enc.end_struct();
1729 }
1730 enc.write_stop();
1731 enc.end_struct();
1732 }
1733
1735 dec.begin_struct();
1736 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHash: begin_struct failed"};
1737 for (;;) {
1738 auto [fid, ftype] = dec.read_field_header();
1739 if (ftype == compact_type::STOP) break;
1740 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHash: field header error"};
1741 switch (fid) {
1742 case 1:
1743 if (ftype != compact_type::STRUCT) {
1744 return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHash.XXHASH: expected STRUCT"};
1745 }
1748 break;
1749 default:
1750 dec.skip_field(ftype);
1751 break;
1752 }
1753 }
1754 dec.end_struct();
1755 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHash: decoder error"};
1756 return {};
1757 }
1758};
1759
1762 enum class Kind : int32_t { NONE = 0, UNCOMPRESSED = 1 } kind = Kind::UNCOMPRESSED;
1763
1765
1766 void serialize(CompactEncoder& enc) const {
1767 enc.begin_struct();
1768 if (kind == Kind::UNCOMPRESSED) {
1770 // BloomFilterUncompressed: empty struct
1771 enc.begin_struct(); enc.write_stop(); enc.end_struct();
1772 }
1773 enc.write_stop();
1774 enc.end_struct();
1775 }
1776
1778 dec.begin_struct();
1779 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterCompression: begin_struct failed"};
1780 for (;;) {
1781 auto [fid, ftype] = dec.read_field_header();
1782 if (ftype == compact_type::STOP) break;
1783 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterCompression: field header error"};
1784 switch (fid) {
1785 case 1:
1786 if (ftype != compact_type::STRUCT) {
1788 "BloomFilterCompression.UNCOMPRESSED: expected STRUCT"};
1789 }
1792 break;
1793 default:
1794 dec.skip_field(ftype);
1795 break;
1796 }
1797 }
1798 dec.end_struct();
1799 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterCompression: decoder error"};
1800 return {};
1801 }
1802};
1803
1808 int32_t num_bytes = 0;
1812
1814
1815 void serialize(CompactEncoder& enc) const {
1816 enc.begin_struct();
1818 enc.write_i32(num_bytes);
1820 algorithm.serialize(enc);
1822 hash.serialize(enc);
1825 enc.write_stop();
1826 enc.end_struct();
1827 }
1828
1830 dec.begin_struct();
1831 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHeader: begin_struct failed"};
1832 for (;;) {
1833 auto [fid, ftype] = dec.read_field_header();
1834 if (ftype == compact_type::STOP) break;
1835 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHeader: field header error"};
1836 switch (fid) {
1837 case 1:
1838 if (ftype != compact_type::I32) {
1839 return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHeader.num_bytes: expected I32"};
1840 }
1841 num_bytes = dec.read_i32();
1842 break;
1843 case 2: {
1844 if (ftype != compact_type::STRUCT) {
1845 return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHeader.algorithm: expected STRUCT"};
1846 }
1847 if (auto r = algorithm.deserialize(dec); !r.has_value()) return r.error();
1848 break;
1849 }
1850 case 3: {
1851 if (ftype != compact_type::STRUCT) {
1852 return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHeader.hash: expected STRUCT"};
1853 }
1854 if (auto r = hash.deserialize(dec); !r.has_value()) return r.error();
1855 break;
1856 }
1857 case 4: {
1858 if (ftype != compact_type::STRUCT) {
1859 return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHeader.compression: expected STRUCT"};
1860 }
1861 if (auto r = compression.deserialize(dec); !r.has_value()) return r.error();
1862 break;
1863 }
1864 default:
1865 dec.skip_field(ftype);
1866 break;
1867 }
1868 }
1869 dec.end_struct();
1870 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "BloomFilterHeader: decoder error"};
1871 return {};
1872 }
1873};
1874
1875// ============================================================================
1876// ยง 6 Column chunk (parquet.thrift ColumnChunk, fields 1-13)
1877// Field 4 (ColumnCryptoMetaData) added per parquet-format 2.9.0.
1878// ============================================================================
1879
1885 std::optional<std::string> file_path;
1886 int64_t file_offset = 0;
1887 std::optional<ColumnMetaData> meta_data;
1888 std::optional<ColumnCryptoMetaData> crypto_metadata;
1889 // fields 5-7 skipped (encrypted_column_metadata, offset_index_offset (legacy), etc.)
1890 std::optional<int64_t> bloom_filter_offset;
1891 std::optional<int32_t> bloom_filter_length;
1892 std::optional<int64_t> column_index_offset;
1893 std::optional<int32_t> column_index_length;
1894 std::optional<int64_t> offset_index_offset;
1895 std::optional<int32_t> offset_index_length;
1896
1897 ColumnChunk() = default;
1898
1899 void serialize(CompactEncoder& enc) const {
1900 enc.begin_struct();
1901 if (file_path.has_value()) {
1903 enc.write_string(*file_path);
1904 }
1907 if (meta_data.has_value()) {
1909 meta_data->serialize(enc);
1910 }
1911 if (crypto_metadata.has_value()) {
1913 crypto_metadata->serialize(enc);
1914 }
1915 if (bloom_filter_offset.has_value()) {
1918 }
1919 if (bloom_filter_length.has_value()) {
1922 }
1923 if (column_index_offset.has_value()) {
1926 }
1927 if (column_index_length.has_value()) {
1930 }
1931 if (offset_index_offset.has_value()) {
1934 }
1935 if (offset_index_length.has_value()) {
1938 }
1939 enc.write_stop();
1940 enc.end_struct();
1941 }
1942
1944 dec.begin_struct();
1945 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnChunk: begin_struct failed"};
1946 for (;;) {
1947 auto [fid, ftype] = dec.read_field_header();
1948 if (ftype == compact_type::STOP) break;
1949 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnChunk: field header error"};
1950 switch (fid) {
1951 case 1:
1952 if (ftype != compact_type::BINARY) {
1953 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnChunk.file_path: expected BINARY"};
1954 }
1955 file_path = dec.read_string();
1956 break;
1957 case 2:
1958 if (ftype != compact_type::I64) {
1959 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnChunk.file_offset: expected I64"};
1960 }
1961 file_offset = dec.read_i64();
1962 break;
1963 case 3: {
1964 if (ftype != compact_type::STRUCT) {
1965 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnChunk.meta_data: expected STRUCT"};
1966 }
1967 meta_data.emplace();
1968 if (auto r = meta_data->deserialize(dec); !r.has_value()) return r.error();
1969 break;
1970 }
1971 case 4: {
1972 if (ftype != compact_type::STRUCT) {
1973 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnChunk.crypto_metadata: expected STRUCT"};
1974 }
1975 crypto_metadata.emplace();
1976 if (auto r = crypto_metadata->deserialize(dec); !r.has_value()) return r.error();
1977 break;
1978 }
1979 case 8:
1980 if (ftype != compact_type::I64) {
1982 "ColumnChunk.bloom_filter_offset: expected I64"};
1983 }
1985 break;
1986 case 9:
1987 if (ftype != compact_type::I32) {
1989 "ColumnChunk.bloom_filter_length: expected I32"};
1990 }
1992 break;
1993 case 10:
1994 if (ftype != compact_type::I64) {
1996 "ColumnChunk.column_index_offset: expected I64"};
1997 }
1999 break;
2000 case 11:
2001 if (ftype != compact_type::I32) {
2003 "ColumnChunk.column_index_length: expected I32"};
2004 }
2006 break;
2007 case 12:
2008 if (ftype != compact_type::I64) {
2010 "ColumnChunk.offset_index_offset: expected I64"};
2011 }
2013 break;
2014 case 13:
2015 if (ftype != compact_type::I32) {
2017 "ColumnChunk.offset_index_length: expected I32"};
2018 }
2020 break;
2021 default:
2022 dec.skip_field(ftype); // includes fields 5-7
2023 break;
2024 }
2025 }
2026 dec.end_struct();
2027 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnChunk: decoder error"};
2028 return {};
2029 }
2030};
2031
2032// ============================================================================
2033// ยง 7 Ordering family (parquet-format 2.9.0)
2034// ============================================================================
2035
2037enum class SortOrder : int32_t {
2038 SIGNED = 0,
2039 UNSIGNED = 1,
2040 UNKNOWN = 2
2041};
2042
2048 enum class Kind : int32_t { NONE = 0, TYPE_ORDER = 1 } kind = Kind::TYPE_ORDER;
2049
2050 ColumnOrder() = default;
2051
2052 void serialize(CompactEncoder& enc) const {
2053 enc.begin_struct();
2054 if (kind == Kind::TYPE_ORDER) {
2056 // TypeDefinedOrder: empty struct
2057 enc.begin_struct(); enc.write_stop(); enc.end_struct();
2058 }
2059 enc.write_stop();
2060 enc.end_struct();
2061 }
2062
2064 dec.begin_struct();
2065 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnOrder: begin_struct failed"};
2066 for (;;) {
2067 auto [fid, ftype] = dec.read_field_header();
2068 if (ftype == compact_type::STOP) break;
2069 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnOrder: field header error"};
2070 switch (fid) {
2071 case 1:
2072 if (ftype != compact_type::STRUCT) {
2073 return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnOrder.TYPE_ORDER: expected STRUCT"};
2074 }
2076 dec.skip_field(compact_type::STRUCT); // consume empty TypeDefinedOrder body
2077 break;
2078 default:
2079 dec.skip_field(ftype);
2080 break;
2081 }
2082 }
2083 dec.end_struct();
2084 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "ColumnOrder: decoder error"};
2085 return {};
2086 }
2087};
2088
2093 int32_t column_idx = 0;
2094 bool descending = false;
2095 bool nulls_first = true;
2096
2097 SortingColumn() = default;
2098 SortingColumn(int32_t idx, bool desc, bool nf)
2099 : column_idx(idx), descending(desc), nulls_first(nf) {}
2100
2101 void serialize(CompactEncoder& enc) const {
2102 enc.begin_struct();
2104 enc.write_i32(column_idx);
2107 enc.write_stop();
2108 enc.end_struct();
2109 }
2110
2112 dec.begin_struct();
2113 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "SortingColumn: begin_struct failed"};
2114 for (;;) {
2115 auto [fid, ftype] = dec.read_field_header();
2116 if (ftype == compact_type::STOP) break;
2117 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "SortingColumn: field header error"};
2118 switch (fid) {
2119 case 1:
2120 if (ftype != compact_type::I32) {
2121 return {ErrorCode::THRIFT_DECODE_ERROR, "SortingColumn.column_idx: expected I32"};
2122 }
2123 column_idx = dec.read_i32();
2124 break;
2125 case 2:
2126 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
2127 return {ErrorCode::THRIFT_DECODE_ERROR, "SortingColumn.descending: expected BOOL"};
2128 }
2129 descending = dec.read_bool();
2130 break;
2131 case 3:
2132 if (ftype != compact_type::BOOL_TRUE && ftype != compact_type::BOOL_FALSE) {
2133 return {ErrorCode::THRIFT_DECODE_ERROR, "SortingColumn.nulls_first: expected BOOL"};
2134 }
2135 nulls_first = dec.read_bool();
2136 break;
2137 default:
2138 dec.skip_field(ftype);
2139 break;
2140 }
2141 }
2142 dec.end_struct();
2143 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "SortingColumn: decoder error"};
2144 return {};
2145 }
2146};
2147
2148// ============================================================================
2149// ยง 8 Row group and file metadata (existing, updated for parquet-format 2.9.0)
2150// ============================================================================
2151
2156struct RowGroup {
2157 std::vector<ColumnChunk> columns;
2158 int64_t total_byte_size = 0;
2159 int64_t num_rows = 0;
2160 std::vector<SortingColumn> sorting_columns;
2161
2162 RowGroup() = default;
2163
2164 void serialize(CompactEncoder& enc) const {
2165 enc.begin_struct();
2167 enc.write_list_header(compact_type::STRUCT, static_cast<int32_t>(columns.size()));
2168 for (const auto& col : columns) {
2169 col.serialize(enc);
2170 }
2171
2175 enc.write_i64(num_rows);
2176
2177 if (!sorting_columns.empty()) {
2180 static_cast<int32_t>(sorting_columns.size()));
2181 for (const auto& sc : sorting_columns) {
2182 sc.serialize(enc);
2183 }
2184 }
2185
2186 enc.write_stop();
2187 enc.end_struct();
2188 }
2189
2191 dec.begin_struct();
2192 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "RowGroup: begin_struct failed"};
2193 for (;;) {
2194 auto [fid, ftype] = dec.read_field_header();
2195 if (ftype == compact_type::STOP) break;
2196 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "RowGroup: field header error"};
2197 switch (fid) {
2198 case 1: {
2199 if (ftype != compact_type::LIST) {
2200 return {ErrorCode::THRIFT_DECODE_ERROR, "RowGroup.columns: expected LIST"};
2201 }
2202 auto [elem_type, count] = dec.read_list_header();
2203 static constexpr int32_t MAX_STRUCT_LIST_SIZE = 10000;
2204 if (count < 0 || count > MAX_STRUCT_LIST_SIZE) {
2206 "RowGroup.columns: list exceeds maximum size"};
2207 }
2208 columns.resize(static_cast<size_t>(count));
2209 for (int32_t i = 0; i < count; ++i) {
2210 if (auto r = columns[static_cast<size_t>(i)].deserialize(dec);
2211 !r.has_value()) {
2212 return r.error();
2213 }
2214 }
2215 break;
2216 }
2217 case 2:
2218 if (ftype != compact_type::I64) {
2219 return {ErrorCode::THRIFT_DECODE_ERROR, "RowGroup.total_byte_size: expected I64"};
2220 }
2221 total_byte_size = dec.read_i64();
2222 break;
2223 case 3:
2224 if (ftype != compact_type::I64) {
2225 return {ErrorCode::THRIFT_DECODE_ERROR, "RowGroup.num_rows: expected I64"};
2226 }
2227 num_rows = dec.read_i64();
2228 break;
2229 case 4: {
2230 if (ftype != compact_type::LIST) {
2231 return {ErrorCode::THRIFT_DECODE_ERROR, "RowGroup.sorting_columns: expected LIST"};
2232 }
2233 auto [elem_type, count] = dec.read_list_header();
2234 static constexpr int32_t MAX_SORT_COLS = 10000;
2235 if (count < 0 || count > MAX_SORT_COLS) {
2237 "RowGroup.sorting_columns: list exceeds maximum size"};
2238 }
2239 sorting_columns.resize(static_cast<size_t>(count));
2240 for (int32_t i = 0; i < count; ++i) {
2241 if (auto r = sorting_columns[static_cast<size_t>(i)].deserialize(dec);
2242 !r.has_value()) {
2243 return r.error();
2244 }
2245 }
2246 break;
2247 }
2248 default:
2249 dec.skip_field(ftype);
2250 break;
2251 }
2252 }
2253 dec.end_struct();
2254 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "RowGroup: decoder error"};
2255 return {};
2256 }
2257};
2258
2267 std::vector<SchemaElement> schema;
2268 int64_t num_rows = 0;
2269 std::vector<RowGroup> row_groups;
2270 std::optional<std::vector<KeyValue>> key_value_metadata;
2271 std::optional<std::string> created_by;
2272 std::optional<std::vector<ColumnOrder>> column_orders;
2273
2274 FileMetaData() = default;
2275
2276 void serialize(CompactEncoder& enc) const {
2277 enc.begin_struct();
2279 enc.write_i32(version);
2280
2282 enc.write_list_header(compact_type::STRUCT, static_cast<int32_t>(schema.size()));
2283 for (const auto& elem : schema) {
2284 elem.serialize(enc);
2285 }
2286
2288 enc.write_i64(num_rows);
2289
2291 enc.write_list_header(compact_type::STRUCT, static_cast<int32_t>(row_groups.size()));
2292 for (const auto& rg : row_groups) {
2293 rg.serialize(enc);
2294 }
2295
2296 if (key_value_metadata.has_value()) {
2299 static_cast<int32_t>(key_value_metadata->size()));
2300 for (const auto& kv : *key_value_metadata) {
2301 kv.serialize(enc);
2302 }
2303 }
2304
2305 if (created_by.has_value()) {
2308 }
2309
2310 if (column_orders.has_value()) {
2313 static_cast<int32_t>(column_orders->size()));
2314 for (const auto& co : *column_orders) {
2315 co.serialize(enc);
2316 }
2317 }
2318
2319 enc.write_stop();
2320 enc.end_struct();
2321 }
2322
2324 dec.begin_struct();
2325 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "FileMetaData: begin_struct failed"};
2326 for (;;) {
2327 auto [fid, ftype] = dec.read_field_header();
2328 if (ftype == compact_type::STOP) break;
2329 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "FileMetaData: field header error"};
2330 switch (fid) {
2331 case 1:
2332 if (ftype != compact_type::I32) {
2333 return {ErrorCode::THRIFT_DECODE_ERROR, "FileMetaData.version: expected I32"};
2334 }
2335 version = dec.read_i32();
2336 break;
2337 case 2: {
2338 if (ftype != compact_type::LIST) {
2339 return {ErrorCode::THRIFT_DECODE_ERROR, "FileMetaData.schema: expected LIST"};
2340 }
2341 auto [elem_type, count] = dec.read_list_header();
2342 static constexpr int32_t MAX_STRUCT_LIST_SIZE = 10000;
2343 if (count < 0 || count > MAX_STRUCT_LIST_SIZE) {
2345 "FileMetaData.schema: list exceeds maximum size"};
2346 }
2347 schema.resize(static_cast<size_t>(count));
2348 for (int32_t i = 0; i < count; ++i) {
2349 if (auto r = schema[static_cast<size_t>(i)].deserialize(dec);
2350 !r.has_value()) {
2351 return r.error();
2352 }
2353 }
2354 break;
2355 }
2356 case 3:
2357 if (ftype != compact_type::I64) {
2358 return {ErrorCode::THRIFT_DECODE_ERROR, "FileMetaData.num_rows: expected I64"};
2359 }
2360 num_rows = dec.read_i64();
2361 break;
2362 case 4: {
2363 if (ftype != compact_type::LIST) {
2364 return {ErrorCode::THRIFT_DECODE_ERROR, "FileMetaData.row_groups: expected LIST"};
2365 }
2366 auto [elem_type, count] = dec.read_list_header();
2367 static constexpr int32_t MAX_STRUCT_LIST_SIZE_RG = 10000;
2368 if (count < 0 || count > MAX_STRUCT_LIST_SIZE_RG) {
2370 "FileMetaData.row_groups: list exceeds maximum size"};
2371 }
2372 row_groups.resize(static_cast<size_t>(count));
2373 for (int32_t i = 0; i < count; ++i) {
2374 if (auto r = row_groups[static_cast<size_t>(i)].deserialize(dec);
2375 !r.has_value()) {
2376 return r.error();
2377 }
2378 }
2379 break;
2380 }
2381 case 5: {
2382 if (ftype != compact_type::LIST) {
2384 "FileMetaData.key_value_metadata: expected LIST"};
2385 }
2386 auto [elem_type, count] = dec.read_list_header();
2387 static constexpr int32_t MAX_KV_LIST_SIZE = 1'000'000;
2388 if (count < 0 || count > MAX_KV_LIST_SIZE) {
2390 "FileMetaData.key_value_metadata: list exceeds maximum size"};
2391 }
2392 key_value_metadata.emplace();
2393 key_value_metadata->resize(static_cast<size_t>(count));
2394 for (int32_t i = 0; i < count; ++i) {
2395 if (auto r = (*key_value_metadata)[static_cast<size_t>(i)].deserialize(dec);
2396 !r.has_value()) {
2397 return r.error();
2398 }
2399 }
2400 break;
2401 }
2402 case 6:
2403 if (ftype != compact_type::BINARY) {
2404 return {ErrorCode::THRIFT_DECODE_ERROR, "FileMetaData.created_by: expected BINARY"};
2405 }
2406 created_by = dec.read_string();
2407 break;
2408 case 7: {
2409 if (ftype != compact_type::LIST) {
2411 "FileMetaData.column_orders: expected LIST"};
2412 }
2413 auto [elem_type, count] = dec.read_list_header();
2414 static constexpr int32_t MAX_COL_ORDERS = 10000;
2415 if (count < 0 || count > MAX_COL_ORDERS) {
2417 "FileMetaData.column_orders: list exceeds maximum size"};
2418 }
2419 column_orders.emplace();
2420 column_orders->resize(static_cast<size_t>(count));
2421 for (int32_t i = 0; i < count; ++i) {
2422 if (auto r = (*column_orders)[static_cast<size_t>(i)].deserialize(dec);
2423 !r.has_value()) {
2424 return r.error();
2425 }
2426 }
2427 break;
2428 }
2429 default:
2430 dec.skip_field(ftype);
2431 break;
2432 }
2433 }
2434 dec.end_struct();
2435 if (!dec.good()) return {ErrorCode::THRIFT_DECODE_ERROR, "FileMetaData: decoder error"};
2436 return {};
2437 }
2438};
2439
2440} // namespace signet::forge::thrift
bool has_value() const
Return true if the result represents success (no error).
Definition error.hpp:255
A lightweight result type that holds either a success value of type T or an Error.
Definition error.hpp:145
Thrift Compact Protocol reader.
Definition compact.hpp:267
void begin_struct()
Push a new field-ID context for reading a nested struct.
Definition compact.hpp:508
void end_struct()
Pop the field-ID context after finishing a nested struct.
Definition compact.hpp:515
FieldHeader read_field_header()
Read a field header.
Definition compact.hpp:285
int64_t read_i64()
Read a 64-bit integer (zigzag + varint64 decode).
Definition compact.hpp:353
int8_t read_i8()
Read an 8-bit signed integer (single raw byte, I8 wire type).
Definition compact.hpp:342
ListHeader read_list_header()
Read a list header. Returns element type and count.
Definition compact.hpp:400
void skip_field(uint8_t thrift_type)
Skip a field without parsing its value.
Definition compact.hpp:427
std::string read_string()
Read a string (varint-length-prefixed UTF-8 bytes).
Definition compact.hpp:380
std::vector< uint8_t > read_binary()
Read raw binary data (varint-length-prefixed bytes).
Definition compact.hpp:390
bool good() const
Returns true if no errors have occurred (no bounds violations).
Definition compact.hpp:531
bool read_bool()
Read a boolean value.
Definition compact.hpp:332
int32_t read_i32()
Read a 32-bit integer (zigzag + varint decode).
Definition compact.hpp:348
Thrift Compact Protocol writer.
Definition compact.hpp:72
void begin_struct()
Push a new field-ID context for a nested struct.
Definition compact.hpp:100
void end_struct()
Pop the field-ID context after finishing a nested struct.
Definition compact.hpp:103
void write_string(const std::string &val)
Write a string as varint-length-prefixed UTF-8 bytes.
Definition compact.hpp:163
void write_field_bool(int16_t field_id, bool val)
Write a bool field where the value is embedded in the field header's type nibble (1 = true,...
Definition compact.hpp:120
void write_field(int16_t field_id, uint8_t thrift_type)
Write a field header.
Definition compact.hpp:85
void write_i32(int32_t val)
Write a 32-bit integer as zigzag + varint.
Definition compact.hpp:134
void write_stop()
Write struct stop marker (0x00).
Definition compact.hpp:97
void write_i64(int64_t val)
Write a 64-bit integer as zigzag + varint.
Definition compact.hpp:139
void write_i8(int8_t val)
Write an 8-bit signed integer as a single raw byte (I8 wire type).
Definition compact.hpp:113
void write_binary(const uint8_t *data, size_t len)
Write raw binary data as varint-length-prefixed bytes.
Definition compact.hpp:174
void write_list_header(uint8_t elem_type, int32_t size)
Write a list header.
Definition compact.hpp:185
Thrift Compact Protocol encoder and decoder for Parquet metadata serialization.
constexpr uint8_t STRUCT
Nested struct.
Definition compact.hpp:39
constexpr uint8_t I32
32-bit signed integer (zigzag + varint).
Definition compact.hpp:32
constexpr uint8_t BOOL_FALSE
Boolean false (embedded in field header).
Definition compact.hpp:29
constexpr uint8_t BINARY
Length-prefixed bytes (also used for STRING).
Definition compact.hpp:35
constexpr uint8_t LIST
List container.
Definition compact.hpp:36
constexpr uint8_t STOP
Struct stop marker.
Definition compact.hpp:27
constexpr uint8_t BOOL_TRUE
Boolean true (embedded in field header).
Definition compact.hpp:28
constexpr uint8_t I64
64-bit signed integer (zigzag + varint).
Definition compact.hpp:33
constexpr uint8_t I8
8-bit signed integer.
Definition compact.hpp:30
SortOrder
Sort order for column statistics (parquet.thrift SortOrder enum).
Definition types.hpp:2037
@ SIGNED
Values compared as signed integers or IEEE 754 floats.
@ UNKNOWN
Sort order unknown or inapplicable.
@ UNSIGNED
Values compared as unsigned integers or bytes.
PhysicalType
Parquet physical (storage) types as defined in parquet.thrift.
Definition types.hpp:20
@ BYTE_ARRAY
Variable-length byte sequence (strings, binary).
constexpr int32_t PARQUET_VERSION
Parquet format version written to the file footer.
Definition types.hpp:201
Compression
Parquet compression codecs.
Definition types.hpp:115
@ UNCOMPRESSED
No compression.
ConvertedType
Legacy Parquet converted types for backward compatibility with older readers.
Definition types.hpp:67
@ THRIFT_DECODE_ERROR
The Thrift Compact Protocol decoder encountered invalid or malicious input.
Encoding
Parquet page encoding types.
Definition types.hpp:98
@ RLE
Run-length / bit-packed hybrid (used for booleans and def/rep levels).
@ PLAIN_DICTIONARY
Legacy dictionary encoding (Parquet 1.0).
@ PLAIN
Values stored back-to-back in their native binary layout.
PageType
Parquet page types within a column chunk.
Definition types.hpp:127
@ DATA_PAGE
Data page (Parquet 1.0 format).
Repetition
Parquet field repetition types (nullability / cardinality).
Definition types.hpp:140
AES-GCM-CTR-V1 encryption algorithm parameters (parquet.thrift AesGcmCtrV1).
Definition types.hpp:1351
std::optional< bool > supply_aad_prefix
Caller supplies AAD prefix (field 3).
Definition types.hpp:1354
std::optional< std::vector< uint8_t > > aad_prefix
AAD prefix bytes (field 1).
Definition types.hpp:1352
std::optional< bool > aad_file_unique
Unique AAD per file (field 2).
Definition types.hpp:1353
void serialize(CompactEncoder &enc) const
Definition types.hpp:1358
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1374
AES-GCM-V1 encryption algorithm parameters (parquet.thrift AesGcmV1).
Definition types.hpp:1290
std::optional< std::vector< uint8_t > > aad_prefix
AAD prefix bytes (field 1).
Definition types.hpp:1291
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1313
std::optional< bool > aad_file_unique
Unique AAD per file (field 2).
Definition types.hpp:1292
void serialize(CompactEncoder &enc) const
Definition types.hpp:1297
std::optional< bool > supply_aad_prefix
Caller supplies AAD prefix (field 3).
Definition types.hpp:1293
BloomFilterAlgorithm union: BLOCK (SplitBlock) is the only defined algorithm.
Definition types.hpp:1674
enum signet::forge::thrift::BloomFilterAlgorithm::Kind kind
void serialize(CompactEncoder &enc) const
Definition types.hpp:1679
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1690
BloomFilterCompression union: UNCOMPRESSED (field 1) is the only defined mode.
Definition types.hpp:1761
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1777
enum signet::forge::thrift::BloomFilterCompression::Kind kind
void serialize(CompactEncoder &enc) const
Definition types.hpp:1766
BloomFilterHash union: XXHASH (field 1) is the only defined hash function.
Definition types.hpp:1718
void serialize(CompactEncoder &enc) const
Definition types.hpp:1723
enum signet::forge::thrift::BloomFilterHash::Kind kind
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1734
BloomFilterHeader: describes the bloom filter block at a column's bloom_filter_offset.
Definition types.hpp:1807
BloomFilterHash hash
Hash function (field 3).
Definition types.hpp:1810
int32_t num_bytes
Size of the bloom filter in bytes (field 1).
Definition types.hpp:1808
void serialize(CompactEncoder &enc) const
Definition types.hpp:1815
BloomFilterCompression compression
Compression mode (field 4).
Definition types.hpp:1811
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1829
BloomFilterAlgorithm algorithm
Hash algorithm (field 2).
Definition types.hpp:1809
Parquet column chunk descriptor (parquet.thrift fields 1-13).
Definition types.hpp:1884
std::optional< int64_t > column_index_offset
Column index offset (field 10).
Definition types.hpp:1892
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1943
std::optional< std::string > file_path
External file path (field 1).
Definition types.hpp:1885
void serialize(CompactEncoder &enc) const
Definition types.hpp:1899
int64_t file_offset
Byte offset in file (field 2).
Definition types.hpp:1886
std::optional< int64_t > bloom_filter_offset
Bloom filter offset (field 8).
Definition types.hpp:1890
std::optional< int64_t > offset_index_offset
Offset index offset (field 12).
Definition types.hpp:1894
std::optional< int32_t > offset_index_length
Offset index byte length (field 13).
Definition types.hpp:1895
std::optional< int32_t > column_index_length
Column index byte length (field 11).
Definition types.hpp:1893
std::optional< ColumnMetaData > meta_data
Inline column metadata (field 3).
Definition types.hpp:1887
std::optional< ColumnCryptoMetaData > crypto_metadata
PME crypto metadata (field 4).
Definition types.hpp:1888
std::optional< int32_t > bloom_filter_length
Bloom filter byte length (field 9).
Definition types.hpp:1891
ColumnCryptoMetaData union: footer-key (field 1) or column-key (field 2) encryption.
Definition types.hpp:1544
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1574
void serialize(CompactEncoder &enc) const
Definition types.hpp:1555
std::optional< EncryptionWithColumnKey > column_key
Populated when kind == COLUMN_KEY.
Definition types.hpp:1551
@ FOOTER_KEY
EncryptionWithFooterKey (empty struct).
enum signet::forge::thrift::ColumnCryptoMetaData::Kind kind
Parquet column metadata (parquet.thrift fields 1-12).
Definition types.hpp:1056
std::optional< Statistics > statistics
Definition types.hpp:1068
std::optional< int64_t > dictionary_page_offset
Definition types.hpp:1067
std::vector< Encoding > encodings
Definition types.hpp:1058
std::vector< std::string > path_in_schema
Definition types.hpp:1059
std::optional< int64_t > index_page_offset
Definition types.hpp:1066
void serialize(CompactEncoder &enc) const
Definition types.hpp:1072
std::optional< std::vector< KeyValue > > key_value_metadata
Definition types.hpp:1064
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1127
ColumnOrder union: describes how a column's values are compared for statistics.
Definition types.hpp:2047
void serialize(CompactEncoder &enc) const
Definition types.hpp:2052
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:2063
enum signet::forge::thrift::ColumnOrder::Kind kind
Parquet data page header V2 (parquet.thrift fields 1-7).
Definition types.hpp:828
int32_t num_nulls
Number of null values (field 2).
Definition types.hpp:830
Encoding encoding
Data encoding (field 4).
Definition types.hpp:832
void serialize(CompactEncoder &enc) const
Definition types.hpp:839
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:860
int32_t num_values
Total values including nulls (field 1).
Definition types.hpp:829
int32_t num_rows
Number of rows in this page (field 3).
Definition types.hpp:831
bool effective_is_compressed() const
Effective is_compressed value (defaults to true per Parquet spec if absent).
Definition types.hpp:923
std::optional< bool > is_compressed
Whether values are compressed (field 7, default true).
Definition types.hpp:835
int32_t definition_levels_byte_length
Byte length of def levels (field 5).
Definition types.hpp:833
int32_t repetition_levels_byte_length
Byte length of rep levels (field 6).
Definition types.hpp:834
Parquet data page header V1 (parquet.thrift fields 1-5).
Definition types.hpp:674
int32_t num_values
Number of values (field 1, required).
Definition types.hpp:675
Encoding repetition_level_encoding
Rep level encoding (field 4, required).
Definition types.hpp:678
std::optional< Statistics > statistics
Page statistics (field 5, optional).
Definition types.hpp:679
Encoding definition_level_encoding
Def level encoding (field 3, required).
Definition types.hpp:677
void serialize(CompactEncoder &enc) const
Definition types.hpp:683
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:701
Encoding encoding
Data encoding (field 2, required).
Definition types.hpp:676
DecimalType: fixed-point decimal logical type.
Definition types.hpp:146
int32_t precision
Total number of significant decimal digits.
Definition types.hpp:148
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:163
DecimalType(int32_t s, int32_t p)
Definition types.hpp:151
int32_t scale
Number of digits to the right of the decimal point.
Definition types.hpp:147
void serialize(CompactEncoder &enc) const
Definition types.hpp:153
Parquet dictionary page header (parquet.thrift fields 1-3).
Definition types.hpp:766
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:786
std::optional< bool > is_sorted
Whether entries are sorted (field 3).
Definition types.hpp:769
void serialize(CompactEncoder &enc) const
Definition types.hpp:773
Encoding encoding
Dictionary encoding (field 2).
Definition types.hpp:768
int32_t num_values
Number of dictionary entries (field 1).
Definition types.hpp:767
EncryptionAlgorithm union: AES-GCM-V1 (field 1) or AES-GCM-CTR-V1 (field 2).
Definition types.hpp:1412
std::optional< AesGcmV1 > aes_gcm_v1
Populated when kind == AES_GCM_V1.
Definition types.hpp:1414
enum signet::forge::thrift::EncryptionAlgorithm::Kind kind
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1437
void serialize(CompactEncoder &enc) const
Definition types.hpp:1419
std::optional< AesGcmCtrV1 > aes_gcm_ctr_v1
Populated when kind == AES_GCM_CTR_V1.
Definition types.hpp:1415
EncryptionWithColumnKey: per-column encryption key binding (parquet.thrift).
Definition types.hpp:1477
std::optional< std::vector< uint8_t > > key_metadata
Serialized key metadata (field 2).
Definition types.hpp:1479
void serialize(CompactEncoder &enc) const
Definition types.hpp:1483
std::vector< std::string > path_in_schema
Schema path of the encrypted column (field 1).
Definition types.hpp:1478
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1498
FileCryptoMetaData: file-level encryption metadata (parquet.thrift).
Definition types.hpp:1617
EncryptionAlgorithm encryption_algorithm
Encryption algorithm (field 1, required).
Definition types.hpp:1618
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:1635
std::optional< std::vector< uint8_t > > key_metadata
Key wrapping metadata (field 2).
Definition types.hpp:1619
void serialize(CompactEncoder &enc) const
Definition types.hpp:1623
Parquet file metadata (parquet.thrift fields 1-7).
Definition types.hpp:2265
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:2323
std::vector< RowGroup > row_groups
Definition types.hpp:2269
std::optional< std::string > created_by
Definition types.hpp:2271
std::optional< std::vector< KeyValue > > key_value_metadata
Definition types.hpp:2270
void serialize(CompactEncoder &enc) const
Definition types.hpp:2276
std::vector< SchemaElement > schema
Definition types.hpp:2267
std::optional< std::vector< ColumnOrder > > column_orders
Per-column ordering (field 7).
Definition types.hpp:2272
IntType: integer logical type with explicit width and signedness.
Definition types.hpp:96
bool is_signed
True for signed integers; false for unsigned.
Definition types.hpp:98
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:112
IntType(int8_t bw, bool s)
Definition types.hpp:101
int8_t bit_width
Bit width: 8, 16, 32, or 64.
Definition types.hpp:97
void serialize(CompactEncoder &enc) const
Definition types.hpp:103
Parquet KeyValue metadata entry (parquet.thrift field IDs 1-2).
Definition types.hpp:468
KeyValue(std::string k, std::string v)
Definition types.hpp:473
std::optional< std::string > value
Metadata value (field 2, optional).
Definition types.hpp:470
void serialize(CompactEncoder &enc) const
Definition types.hpp:476
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:488
std::string key
Metadata key (field 1, required).
Definition types.hpp:469
LogicalTypeUnion: Thrift wire union for parquet.thrift LogicalType (field 10 of SchemaElement).
Definition types.hpp:249
void serialize(CompactEncoder &enc) const
Definition types.hpp:266
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:304
std::optional< TimestampType > timestamp
Populated when kind == TIMESTAMP.
Definition types.hpp:260
std::optional< DecimalType > decimal
Populated when kind == DECIMAL.
Definition types.hpp:259
@ DECIMAL
DecimalType (field 5 of union).
@ UUID
UUIDType (field 15 of union).
@ STRING
StringType (field 1 of union).
@ TIMESTAMP
TimestampType (field 9 of union).
std::optional< IntType > integer
Populated when kind == INT.
Definition types.hpp:261
enum signet::forge::thrift::LogicalTypeUnion::Kind kind
Parquet page header (parquet.thrift fields 1-8).
Definition types.hpp:933
std::optional< int32_t > crc
Definition types.hpp:937
void serialize(CompactEncoder &enc) const
Definition types.hpp:945
std::optional< DataPageHeaderV2 > data_page_header_v2
Definition types.hpp:941
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:974
std::optional< DictionaryPageHeader > dictionary_page_header
Definition types.hpp:940
std::optional< DataPageHeader > data_page_header
Definition types.hpp:938
Parquet row group (parquet.thrift fields 1-4).
Definition types.hpp:2156
std::vector< ColumnChunk > columns
Column chunks (field 1).
Definition types.hpp:2157
int64_t total_byte_size
Total byte size (field 2).
Definition types.hpp:2158
void serialize(CompactEncoder &enc) const
Definition types.hpp:2164
int64_t num_rows
Number of rows (field 3).
Definition types.hpp:2159
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:2190
std::vector< SortingColumn > sorting_columns
Sort keys (field 4, optional).
Definition types.hpp:2160
Parquet schema element (parquet.thrift fields 1-10).
Definition types.hpp:530
void serialize(CompactEncoder &enc) const
Definition types.hpp:544
std::optional< int32_t > type_length
Type length for FIXED_LEN_BYTE_ARRAY (field 2).
Definition types.hpp:532
std::optional< ConvertedType > converted_type
Legacy converted type (field 6).
Definition types.hpp:536
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:589
std::string name
Column or group name (field 4, required).
Definition types.hpp:534
std::optional< LogicalTypeUnion > logical_type
LogicalType union (field 10, preferred).
Definition types.hpp:540
std::optional< Repetition > repetition_type
REQUIRED/OPTIONAL/REPEATED (field 3).
Definition types.hpp:533
std::optional< int32_t > num_children
Number of children for group nodes (field 5).
Definition types.hpp:535
std::optional< int32_t > scale
Decimal scale (field 7).
Definition types.hpp:537
std::optional< int32_t > precision
Decimal precision (field 8).
Definition types.hpp:538
std::optional< int32_t > field_id
Field ID for nested type evolution (field 9).
Definition types.hpp:539
std::optional< PhysicalType > type
Physical type (field 1, absent for group nodes).
Definition types.hpp:531
SortingColumn: describes sort key for a column within a RowGroup.
Definition types.hpp:2092
SortingColumn(int32_t idx, bool desc, bool nf)
Definition types.hpp:2098
bool nulls_first
True if nulls sort before non-null values (field 3).
Definition types.hpp:2095
void serialize(CompactEncoder &enc) const
Definition types.hpp:2101
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:2111
int32_t column_idx
Zero-based column index within the schema (field 1).
Definition types.hpp:2093
bool descending
True for descending sort order (field 2).
Definition types.hpp:2094
Parquet column statistics (parquet.thrift fields 1-6).
Definition types.hpp:369
std::optional< std::string > min
Old-style min (field 2, deprecated).
Definition types.hpp:371
std::optional< std::string > max_value
New-style max value (field 5, preferred).
Definition types.hpp:374
std::optional< int64_t > null_count
Number of null values (field 3).
Definition types.hpp:372
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:409
void serialize(CompactEncoder &enc) const
Definition types.hpp:379
std::optional< int64_t > distinct_count
Approximate distinct count (field 4).
Definition types.hpp:373
std::optional< std::string > min_value
New-style min value (field 6, preferred).
Definition types.hpp:375
std::optional< std::string > max
Old-style max (field 1, deprecated).
Definition types.hpp:370
Time unit discriminator for TimestampType (parquet.thrift TimeUnit union).
Definition types.hpp:48
enum signet::forge::thrift::TimeUnit::Kind kind
void serialize(CompactEncoder &enc) const
Definition types.hpp:54
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:65
TimestampType: timestamp logical type with UTC adjustment and time unit.
Definition types.hpp:197
TimestampType(bool utc, TimeUnit u)
Definition types.hpp:202
expected< void > deserialize(CompactDecoder &dec)
Definition types.hpp:213
void serialize(CompactEncoder &enc) const
Definition types.hpp:204
TimeUnit unit
Time unit (MILLIS, MICROS, or NANOS).
Definition types.hpp:199
bool is_adjusted_to_utc
True if the timestamp is UTC-normalized.
Definition types.hpp:198
Parquet format enumerations, type traits, and statistics structs.