Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
pme.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3#pragma once
4
8
9// ---------------------------------------------------------------------------
10// pme.hpp -- Parquet Modular Encryption (PME) orchestrator
11//
12// This is the main interface for encrypting and decrypting Parquet file
13// components according to the Parquet Modular Encryption specification.
14//
15// PME encrypts individual Parquet modules (footer, column metadata, data
16// pages) independently, each with its own key and AAD context. This allows:
17//
18// - Different columns to use different keys (column-level access control)
19// - Footer encryption or signed-plaintext footer
20// - Per-module AAD binding (prevents ciphertext transplant attacks)
21//
22// Wire format for encrypted modules:
23// [1 byte: IV size] [IV bytes] [ciphertext (+ GCM tag if applicable)]
24//
25// AAD construction (Parquet PME standard):
26// aad_prefix + '\0' + module_type_byte + '\0' + extra
27//
28// Module types:
29// 0 = FOOTER
30// 1 = COLUMN_META
31// 2 = DATA_PAGE
32// 3 = DICT_PAGE
33// 4 = DATA_PAGE_HEADER
34// 5 = COLUMN_META_HEADER
35//
36// Footer encryption always uses AES-GCM (authenticated).
37// Column data encryption uses AES-GCM or AES-CTR depending on the
38// EncryptionAlgorithm setting.
39//
40// References:
41// - Apache Parquet Modular Encryption (PARQUET-1178)
42// - https://github.com/apache/parquet-format/blob/master/Encryption.md
43// ---------------------------------------------------------------------------
44
50#include "signet/error.hpp"
51
52#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
53#error "signet/crypto/pme.hpp requires SIGNET_ENABLE_COMMERCIAL=ON (AGPL-3.0 commercial tier). See LICENSE_COMMERCIAL."
54#endif
55
56#include <cstddef>
57#include <cstdint>
58#include <cstring>
59#include <random>
60#include <string>
61#include <unordered_map>
62#include <vector>
63
64namespace signet::forge::crypto {
65
66// ===========================================================================
67// PME module type constants
68// ===========================================================================
69
71constexpr size_t PME_REQUIRED_KEY_SIZE = 32;
72
74constexpr size_t PME_AES128_KEY_SIZE = 16;
75
77namespace detail::pme {
78
79static constexpr uint8_t MODULE_FOOTER = 0;
80static constexpr uint8_t MODULE_COLUMN_META = 1;
81static constexpr uint8_t MODULE_DATA_PAGE = 2;
82static constexpr uint8_t MODULE_DICT_PAGE = 3;
83static constexpr uint8_t MODULE_DATA_PAGE_HEADER = 4;
84static constexpr uint8_t MODULE_COLUMN_META_HEADER = 5;
85
87inline std::string build_aad_legacy(const std::string& prefix,
88 uint8_t module_type,
89 const std::string& extra = "") {
90 std::string aad;
91 aad.reserve(prefix.size() + 3 + extra.size());
92 aad.append(prefix);
93 aad.push_back('\0');
94 aad.push_back(static_cast<char>(module_type));
95 aad.push_back('\0');
96 aad.append(extra);
97 return aad;
98}
99
103inline std::string build_aad_spec(const std::string& prefix,
104 uint8_t module_type,
105 const std::string& extra = "") {
106 std::string aad;
107 aad.append(prefix);
108 aad.push_back(static_cast<char>(module_type));
109
110 // Parse extra string: "column_name:col_ordinal:rg_ordinal:page_ordinal" (4 parts)
111 // Backward compat: "column_name:rg_ordinal:page_ordinal" (3 parts, col_ord=0)
112 uint16_t rg_ord = 0, col_ord = 0, pg_ord = 0;
113 if (!extra.empty()) {
114 auto colon1 = extra.find(':');
115 if (colon1 != std::string::npos) {
116 auto colon2 = extra.find(':', colon1 + 1);
117 if (colon2 != std::string::npos) {
118 auto colon3 = extra.find(':', colon2 + 1);
119 if (colon3 != std::string::npos) {
120 // 4-part format: name:col_ord:rg_ord:pg_ord
121 try { col_ord = static_cast<uint16_t>(std::stoi(extra.substr(colon1 + 1, colon2 - colon1 - 1))); } catch (...) {}
122 try { rg_ord = static_cast<uint16_t>(std::stoi(extra.substr(colon2 + 1, colon3 - colon2 - 1))); } catch (...) {}
123 try { pg_ord = static_cast<uint16_t>(std::stoi(extra.substr(colon3 + 1))); } catch (...) {}
124 } else {
125 // 3-part format (backward compat): name:rg_ord:pg_ord, col_ord=0
126 try { rg_ord = static_cast<uint16_t>(std::stoi(extra.substr(colon1 + 1, colon2 - colon1 - 1))); } catch (...) {}
127 try { pg_ord = static_cast<uint16_t>(std::stoi(extra.substr(colon2 + 1))); } catch (...) {}
128 }
129 }
130 }
131 }
132
133 auto append_le16 = [&](uint16_t v) {
134 aad.push_back(static_cast<char>(v & 0xFF));
135 aad.push_back(static_cast<char>((v >> 8) & 0xFF));
136 };
137
138 if (module_type >= MODULE_COLUMN_META) {
139 append_le16(rg_ord);
140 append_le16(col_ord);
141 append_le16(pg_ord);
142 }
143
144 return aad;
145}
146
148inline std::string build_aad(const EncryptionConfig& config,
149 const std::string& prefix,
150 uint8_t module_type,
151 const std::string& extra = "") {
152 if (config.aad_format == EncryptionConfig::AadFormat::SPEC_BINARY) {
153 return build_aad_spec(prefix, module_type, extra);
154 }
155 return build_aad_legacy(prefix, module_type, extra);
156}
157
163inline std::string pme_key_size_error(size_t actual_size,
164 const std::string& context) {
165 if (actual_size == PME_AES128_KEY_SIZE) {
166 return "PME: AES-128 (16-byte) keys detected for " + context
167 + ". Signet Forge requires AES-256 (32-byte) keys per "
168 "NIST SP 800-131A. See Gap P-7 for AES-128 interop roadmap.";
169 }
170 return "PME: invalid key size (" + std::to_string(actual_size)
171 + " bytes) for " + context
172 + ". Expected " + std::to_string(PME_REQUIRED_KEY_SIZE)
173 + " bytes (AES-256).";
174}
175
176} // namespace detail::pme
178
179// ===========================================================================
180// FileEncryptor -- Encrypts Parquet file components
181// ===========================================================================
182
197public:
201 : config_(config) {
202 // Gap P-8: Build O(1) column key lookup cache from O(n) vector
203 for (const auto& ck : config_.column_keys) {
204 key_cache_[ck.column_name] = &ck.key;
205 }
206 }
207
208 // -----------------------------------------------------------------------
209 // encrypt_footer -- Encrypt the serialized FileMetaData
210 //
211 // Always uses AES-GCM regardless of the algorithm setting, because the
212 // footer requires authenticated encryption (tamper detection).
213 //
214 // AAD = aad_prefix + '\0' + MODULE_FOOTER + '\0'
215 //
216 // Output format:
217 // [1 byte: IV_SIZE(12)] [12 bytes: IV] [ciphertext + 16-byte GCM tag]
218 // -----------------------------------------------------------------------
226 const uint8_t* footer_data, size_t size) const {
227
228 auto license = commercial::require_feature("PME encrypt_footer");
229 if (!license) return license.error();
230
232 config_.algorithm, config_.footer_key);
233 if (config_.footer_key.size() != cipher->key_size()) {
235 detail::pme::pme_key_size_error(
236 config_.footer_key.size(), "footer")};
237 }
238
239 // Build AAD
240 std::string aad = build_aad(config_.aad_prefix,
241 detail::pme::MODULE_FOOTER);
242
243 // Encrypt with ICipher (GCM — footer is always authenticated)
244 return cipher->encrypt(footer_data, size, aad);
245 }
246
247 // -----------------------------------------------------------------------
248 // encrypt_column_page -- Encrypt a column data page
249 //
250 // Uses AES-GCM when algorithm is AES_GCM_V1, AES-CTR when AES_GCM_CTR_V1.
251 //
252 // AAD = aad_prefix + '\0' + MODULE_DATA_PAGE + '\0'
253 // + column_name + ':' + row_group_ordinal + ':' + page_ordinal
254 //
255 // The AAD binds the ciphertext to its location in the file, preventing
256 // page reordering or transplant attacks.
257 // -----------------------------------------------------------------------
270 const uint8_t* page_data, size_t size,
271 const std::string& column_name,
272 int32_t row_group_ordinal,
273 int32_t page_ordinal) const {
274
275 auto license = commercial::require_feature("PME encrypt_column_page");
276 if (!license) return license.error();
277
278 const auto& key = get_column_key(column_name);
279 if (key.empty()) {
280 // No key for this column -- pass through unencrypted
281 return std::vector<uint8_t>(page_data, page_data + size);
282 }
283
284 auto cipher = CipherFactory::create_column_cipher(config_.algorithm, key);
285 if (key.size() != cipher->key_size()) {
287 detail::pme::pme_key_size_error(
288 key.size(), "column '" + column_name + "'")};
289 }
290
291 // Build extra AAD context: column_name:rg_ordinal:page_ordinal
292 std::string extra = column_name + ":"
293 + std::to_string(row_group_ordinal) + ":"
294 + std::to_string(page_ordinal);
295
297 // AES-GCM for column data — include AAD
298 std::string aad = build_aad(config_.aad_prefix,
299 detail::pme::MODULE_DATA_PAGE, extra);
300 return cipher->encrypt(page_data, size, aad);
301 } else {
302 // AES-CTR for column data (AES_GCM_CTR_V1) — no AAD
303 return cipher->encrypt(page_data, size);
304 }
305 }
306
307 // -----------------------------------------------------------------------
308 // encrypt_column_metadata -- Encrypt serialized ColumnMetaData
309 //
310 // Always uses AES-GCM (column metadata requires authentication).
311 //
312 // AAD = aad_prefix + '\0' + MODULE_COLUMN_META + '\0' + column_name
313 // -----------------------------------------------------------------------
321 const uint8_t* metadata, size_t size,
322 const std::string& column_name) const {
323
324 auto license = commercial::require_feature("PME encrypt_column_metadata");
325 if (!license) return license.error();
326
327 const auto& key = get_column_key(column_name);
328 if (key.empty()) {
329 // No key -- pass through
330 return std::vector<uint8_t>(metadata, metadata + size);
331 }
332
333 auto cipher = CipherFactory::create_metadata_cipher(config_.algorithm, key);
334 if (key.size() != cipher->key_size()) {
336 detail::pme::pme_key_size_error(
337 key.size(), "column '" + column_name + "'")};
338 }
339
340 std::string aad = build_aad(config_.aad_prefix,
341 detail::pme::MODULE_COLUMN_META, column_name);
342 return cipher->encrypt(metadata, size, aad);
343 }
344
345 // -----------------------------------------------------------------------
346 // encrypt_dict_page -- Encrypt a dictionary page (Gap P-1)
347 //
348 // PME spec requires dictionary pages to be encrypted with the column key
349 // using the same algorithm as data pages. Module type = MODULE_DICT_PAGE (3).
350 // Without this, dictionary-encoded columns leak all distinct values.
351 //
352 // AAD = aad_prefix + '\0' + MODULE_DICT_PAGE + '\0'
353 // + column_name + ':' + row_group_ordinal + ':0'
354 //
355 // Reference: Apache Parquet Encryption specification (PARQUET-1178)
356 // https://github.com/apache/parquet-format/blob/master/Encryption.md
357 // -----------------------------------------------------------------------
369 const uint8_t* page_data, size_t size,
370 const std::string& column_name,
371 int32_t row_group_ordinal) const {
372
373 auto license = commercial::require_feature("PME encrypt_dict_page");
374 if (!license) return license.error();
375
376 const auto& key = get_column_key(column_name);
377 if (key.empty()) {
378 return std::vector<uint8_t>(page_data, page_data + size);
379 }
380
381 auto cipher = CipherFactory::create_column_cipher(config_.algorithm, key);
382 if (key.size() != cipher->key_size()) {
384 detail::pme::pme_key_size_error(
385 key.size(), "column '" + column_name + "'")};
386 }
387
388 std::string extra = column_name + ":"
389 + std::to_string(row_group_ordinal) + ":0";
390
392 std::string aad = build_aad(config_.aad_prefix,
393 detail::pme::MODULE_DICT_PAGE, extra);
394 return cipher->encrypt(page_data, size, aad);
395 } else {
396 return cipher->encrypt(page_data, size);
397 }
398 }
399
400 // -----------------------------------------------------------------------
401 // encrypt_data_page_header -- Encrypt a data page header (Gap P-2)
402 //
403 // In AES_GCM_CTR_V1 mode, page headers contain min/max statistics that
404 // leak plaintext information about encrypted columns. The PME spec
405 // requires page headers to be GCM-encrypted even when page data uses CTR.
406 // Module type = MODULE_DATA_PAGE_HEADER (4).
407 //
408 // Reference: Apache Parquet Encryption specification (PARQUET-1178)
409 // https://github.com/apache/parquet-format/blob/master/Encryption.md
410 // -----------------------------------------------------------------------
423 const uint8_t* header_data, size_t size,
424 const std::string& column_name,
425 int32_t row_group_ordinal,
426 int32_t page_ordinal) const {
427
428 auto license = commercial::require_feature("PME encrypt_data_page_header");
429 if (!license) return license.error();
430
431 const auto& key = get_column_key(column_name);
432 if (key.empty()) {
433 return std::vector<uint8_t>(header_data, header_data + size);
434 }
435
436 // Page headers always use GCM (authenticated) regardless of algorithm setting
437 auto cipher = CipherFactory::create_metadata_cipher(config_.algorithm, key);
438 if (key.size() != cipher->key_size()) {
440 detail::pme::pme_key_size_error(
441 key.size(), "column '" + column_name + "'")};
442 }
443
444 std::string extra = column_name + ":"
445 + std::to_string(row_group_ordinal) + ":"
446 + std::to_string(page_ordinal);
447 std::string aad = build_aad(config_.aad_prefix,
448 detail::pme::MODULE_DATA_PAGE_HEADER, extra);
449 return cipher->encrypt(header_data, size, aad);
450 }
451
452 // -----------------------------------------------------------------------
453 // encrypt_column_meta_header -- Encrypt a column metadata header (Gap P-2)
454 //
455 // Module type = MODULE_COLUMN_META_HEADER (5). Always GCM-authenticated.
456 // -----------------------------------------------------------------------
464 const uint8_t* header_data, size_t size,
465 const std::string& column_name) const {
466
467 auto license = commercial::require_feature("PME encrypt_column_meta_header");
468 if (!license) return license.error();
469
470 const auto& key = get_column_key(column_name);
471 if (key.empty()) {
472 return std::vector<uint8_t>(header_data, header_data + size);
473 }
474
475 auto cipher = CipherFactory::create_metadata_cipher(config_.algorithm, key);
476 if (key.size() != cipher->key_size()) {
478 detail::pme::pme_key_size_error(
479 key.size(), "column '" + column_name + "'")};
480 }
481
482 std::string aad = build_aad(config_.aad_prefix,
483 detail::pme::MODULE_COLUMN_META_HEADER,
484 column_name);
485 return cipher->encrypt(header_data, size, aad);
486 }
487
488 // -----------------------------------------------------------------------
489 // sign_footer -- Sign plaintext footer with HMAC-SHA256 (Gap P-3)
490 //
491 // In "signed plaintext footer" mode, the footer is NOT encrypted but
492 // is signed with HMAC-SHA256 for tamper detection. This allows metadata
493 // inspection tools to read column names, statistics, and schema without
494 // decryption keys, while still detecting modifications.
495 //
496 // The signing key is derived from the footer key using HKDF:
497 // signing_key = HKDF-Expand(HKDF-Extract(aad_prefix, footer_key),
498 // "signet-pme-footer-sign-v1", 32)
499 //
500 // Output format: [footer_data] [32-byte HMAC-SHA256 signature]
501 //
502 // Reference: Apache Parquet Encryption (PARQUET-1178) §4.2
503 // -----------------------------------------------------------------------
511 const uint8_t* footer_data, size_t size) const {
512
513 auto license = commercial::require_feature("PME sign_footer");
514 if (!license) return license.error();
515
516 if (config_.footer_key.empty() || config_.footer_key.size() != PME_REQUIRED_KEY_SIZE) {
518 detail::pme::pme_key_size_error(
519 config_.footer_key.size(), "footer signing")};
520 }
521
522 // Derive signing key via HKDF
523 auto signing_key = derive_footer_signing_key();
524
525 // Compute HMAC-SHA256(signing_key, aad || footer_data)
526 std::string aad = build_aad(config_.aad_prefix, detail::pme::MODULE_FOOTER);
527 std::vector<uint8_t> msg;
528 msg.reserve(aad.size() + size);
529 msg.insert(msg.end(), aad.begin(), aad.end());
530 msg.insert(msg.end(), footer_data, footer_data + size);
531
532 auto hmac = detail::hkdf::hmac_sha256(
533 signing_key.data(), signing_key.size(),
534 msg.data(), msg.size());
535
536 // Output: footer_data || hmac
537 std::vector<uint8_t> out;
538 out.reserve(size + 32);
539 out.insert(out.end(), footer_data, footer_data + size);
540 out.insert(out.end(), hmac.begin(), hmac.end());
541 return out;
542 }
543
544 // -----------------------------------------------------------------------
545 // wrap_keys -- Wrap all DEKs under their KEKs via KMS (Gap P-5)
546 //
547 // For EXTERNAL key mode with a KMS client configured: wraps the footer
548 // key and all column keys under their respective KEK identifiers.
549 //
550 // Returns a map of key_id → wrapped_dek for storage in file metadata.
551 //
552 // Reference: Parquet PME spec (PARQUET-1178) §3, NIST SP 800-38F
553 // -----------------------------------------------------------------------
558 wrap_keys() const {
559
560 auto license = commercial::require_feature("PME wrap_keys");
561 if (!license) return license.error();
562
563 if (!config_.kms_client) {
565 "PME: KMS client not configured for key wrapping"};
566 }
567
568 std::vector<std::pair<std::string, std::vector<uint8_t>>> result;
569
570 // Wrap footer key
571 if (!config_.footer_key.empty() && !config_.footer_key_id.empty()) {
572 auto wrapped = config_.kms_client->wrap_key(
573 config_.footer_key, config_.footer_key_id);
574 if (!wrapped) return wrapped.error();
575 result.emplace_back(config_.footer_key_id, std::move(wrapped.value()));
576 }
577
578 // Wrap per-column keys
579 for (const auto& ck : config_.column_keys) {
580 if (!ck.key.empty() && !ck.key_id.empty()) {
581 auto wrapped = config_.kms_client->wrap_key(ck.key, ck.key_id);
582 if (!wrapped) return wrapped.error();
583 result.emplace_back(ck.key_id, std::move(wrapped.value()));
584 }
585 }
586
587 // Wrap default column key
588 if (!config_.default_column_key.empty() &&
589 !config_.default_column_key_id.empty()) {
590 auto wrapped = config_.kms_client->wrap_key(
592 if (!wrapped) return wrapped.error();
593 result.emplace_back(config_.default_column_key_id,
594 std::move(wrapped.value()));
595 }
596
597 return result;
598 }
599
604 config_.algorithm,
605 config_.encrypt_footer,
606 config_.aad_prefix
607 };
608 }
609
614 const std::string& column_name) const {
615
617 meta.key_mode = config_.key_mode;
618
619 // Look for a specific column key
620 for (const auto& ck : config_.column_keys) {
621 if (ck.column_name == column_name) {
622 if (config_.key_mode == KeyMode::INTERNAL) {
623 meta.key_material = ck.key;
624 }
625 meta.key_id = ck.key_id;
626 return meta;
627 }
628 }
629
630 // Fall back to default column key
631 if (config_.key_mode == KeyMode::INTERNAL) {
632 meta.key_material = config_.default_column_key;
633 }
634 meta.key_id = config_.default_column_key_id;
635 return meta;
636 }
637
641 [[nodiscard]] bool is_column_encrypted(const std::string& column_name) const {
642 return !get_column_key(column_name).empty();
643 }
644
647 [[nodiscard]] const EncryptionConfig& config() const { return config_; }
648
649private:
650 EncryptionConfig config_;
652 std::unordered_map<std::string, const std::vector<uint8_t>*> key_cache_;
653
657 [[nodiscard]] const std::vector<uint8_t>& get_column_key(
658 const std::string& column_name) const {
659
660 auto it = key_cache_.find(column_name);
661 if (it != key_cache_.end()) {
662 return *it->second;
663 }
664 return config_.default_column_key;
665 }
666
667 // -----------------------------------------------------------------------
668 // derive_footer_signing_key -- HKDF-derived key for signed plaintext footer
669 // -----------------------------------------------------------------------
670 [[nodiscard]] std::array<uint8_t, 32> derive_footer_signing_key() const {
671 static constexpr uint8_t INFO[] = "signet-pme-footer-sign-v1";
672 auto prk = hkdf_extract(
673 reinterpret_cast<const uint8_t*>(config_.aad_prefix.data()),
674 config_.aad_prefix.size(),
675 config_.footer_key.data(),
676 config_.footer_key.size());
677 std::array<uint8_t, 32> key{};
678 (void)hkdf_expand(prk, INFO, sizeof(INFO) - 1, key.data(), key.size());
679 return key;
680 }
681
682 // -----------------------------------------------------------------------
683 // generate_iv -- Generate a random initialization vector
684 //
685 // Delegates to detail::cipher::fill_random_bytes() for platform-aware
686 // CSPRNG (arc4random_buf on macOS/BSD, getrandom on Linux).
687 // GCM: 12 bytes (96 bits). CTR: 16 bytes (128 bits).
688 // -----------------------------------------------------------------------
689 [[nodiscard]] static std::vector<uint8_t> generate_iv(size_t iv_size) {
690 return detail::cipher::generate_iv(iv_size);
691 }
692
693 // -----------------------------------------------------------------------
694 // build_aad -- Construct Parquet PME AAD (dispatches on config format)
695 // -----------------------------------------------------------------------
696 [[nodiscard]] std::string build_aad(const std::string& prefix,
697 uint8_t module_type,
698 const std::string& extra = "") const {
699 return detail::pme::build_aad(config_, prefix, module_type, extra);
700 }
701
702 // -----------------------------------------------------------------------
703 // prepend_iv -- Wrap ciphertext with IV header
704 //
705 // Output: [1 byte: iv.size()] [iv bytes] [ciphertext bytes]
706 // -----------------------------------------------------------------------
707 [[nodiscard]] static std::vector<uint8_t> prepend_iv(
708 const std::vector<uint8_t>& iv,
709 const std::vector<uint8_t>& ciphertext) {
710
711 std::vector<uint8_t> out;
712 out.reserve(1 + iv.size() + ciphertext.size());
713 out.push_back(static_cast<uint8_t>(iv.size()));
714 out.insert(out.end(), iv.begin(), iv.end());
715 out.insert(out.end(), ciphertext.begin(), ciphertext.end());
716 return out;
717 }
718
719 // -----------------------------------------------------------------------
720 // encrypt_gcm -- AES-GCM encryption with AAD construction (via ICipher)
721 // -----------------------------------------------------------------------
722 [[nodiscard]] expected<std::vector<uint8_t>> encrypt_gcm(
723 const std::vector<uint8_t>& key,
724 uint8_t module_type,
725 const std::string& extra,
726 const uint8_t* data, size_t size) const {
727
728 // Validate module_type range [0..5] per Parquet PME spec (CWE-20)
729 if (module_type > 5) {
730 return Error{ErrorCode::INVALID_ARGUMENT,
731 "PME module_type out of range [0..5] (CWE-20)"};
732 }
733 std::string aad = build_aad(config_.aad_prefix, module_type, extra);
734 auto cipher = CipherFactory::create_metadata_cipher(config_.algorithm, key);
735 return cipher->encrypt(data, size, aad);
736 }
737
738 // -----------------------------------------------------------------------
739 // encrypt_ctr -- AES-CTR encryption (no authentication, via ICipher)
740 //
741 // For AES_GCM_CTR_V1 column data. CTR mode has no AAD -- integrity is
742 // verified by Parquet page checksums.
743 // -----------------------------------------------------------------------
744 [[nodiscard]] expected<std::vector<uint8_t>> encrypt_ctr(
745 const std::vector<uint8_t>& key,
746 const uint8_t* data, size_t size) const {
747
750 return cipher->encrypt(data, size);
751 }
752};
753
754// ===========================================================================
755// FileDecryptor -- Decrypts Parquet file components
756// ===========================================================================
757
770public:
774 : config_(config) {
775 // Gap P-8: Build O(1) column key lookup cache from O(n) vector
776 for (const auto& ck : config_.column_keys) {
777 key_cache_[ck.column_name] = &ck.key;
778 }
779 }
780
781 // -----------------------------------------------------------------------
782 // decrypt_footer -- Decrypt the encrypted FileMetaData
783 //
784 // Reads the IV from the header, then decrypts with AES-GCM.
785 //
786 // Input format:
787 // [1 byte: IV size] [IV bytes] [ciphertext + 16-byte GCM tag]
788 // -----------------------------------------------------------------------
796 const uint8_t* encrypted_footer, size_t size) const {
797
798 auto license = commercial::require_feature("PME decrypt_footer");
799 if (!license) return license.error();
800
802 config_.algorithm, config_.footer_key);
803 if (config_.footer_key.size() != cipher->key_size()) {
805 detail::pme::pme_key_size_error(
806 config_.footer_key.size(), "footer")};
807 }
808
809 // Build AAD (must match what was used during encryption)
810 std::string aad = build_aad(config_.aad_prefix,
811 detail::pme::MODULE_FOOTER);
812
813 // Decrypt with ICipher (GCM — footer is always authenticated)
814 return cipher->decrypt(encrypted_footer, size, aad);
815 }
816
817 // -----------------------------------------------------------------------
818 // decrypt_column_page -- Decrypt a column data page
819 //
820 // Uses AES-GCM or AES-CTR depending on algorithm setting.
821 // -----------------------------------------------------------------------
831 const uint8_t* encrypted_page, size_t size,
832 const std::string& column_name,
833 int32_t row_group_ordinal,
834 int32_t page_ordinal) const {
835
836 auto license = commercial::require_feature("PME decrypt_column_page");
837 if (!license) return license.error();
838
839 const auto& key = get_column_key(column_name);
840 if (key.empty()) {
841 // Not encrypted -- return as-is
842 return std::vector<uint8_t>(encrypted_page, encrypted_page + size);
843 }
844 auto cipher = CipherFactory::create_column_cipher(config_.algorithm, key);
845 if (key.size() != cipher->key_size()) {
847 detail::pme::pme_key_size_error(
848 key.size(), "column '" + column_name + "'")};
849 }
850
851 // Build extra AAD context (must match encryption)
852 std::string extra = column_name + ":"
853 + std::to_string(row_group_ordinal) + ":"
854 + std::to_string(page_ordinal);
855
857 std::string aad = build_aad(config_.aad_prefix,
858 detail::pme::MODULE_DATA_PAGE, extra);
859 return cipher->decrypt(encrypted_page, size, aad);
860 } else {
861 return cipher->decrypt(encrypted_page, size);
862 }
863 }
864
865 // -----------------------------------------------------------------------
866 // decrypt_column_metadata -- Decrypt serialized ColumnMetaData
867 //
868 // Always uses AES-GCM (column metadata is always authenticated).
869 // -----------------------------------------------------------------------
877 const uint8_t* encrypted_metadata, size_t size,
878 const std::string& column_name) const {
879
880 auto license = commercial::require_feature("PME decrypt_column_metadata");
881 if (!license) return license.error();
882
883 const auto& key = get_column_key(column_name);
884 if (key.empty()) {
885 return std::vector<uint8_t>(encrypted_metadata,
886 encrypted_metadata + size);
887 }
888
889 auto cipher = CipherFactory::create_metadata_cipher(config_.algorithm, key);
890 if (key.size() != cipher->key_size()) {
892 detail::pme::pme_key_size_error(
893 key.size(), "column '" + column_name + "'")};
894 }
895
896 std::string aad = build_aad(config_.aad_prefix,
897 detail::pme::MODULE_COLUMN_META, column_name);
898 return cipher->decrypt(encrypted_metadata, size, aad);
899 }
900
901 // -----------------------------------------------------------------------
902 // decrypt_dict_page -- Decrypt a dictionary page (Gap P-1)
903 //
904 // Counterpart to FileEncryptor::encrypt_dict_page().
905 // Uses MODULE_DICT_PAGE (3) for AAD construction.
906 // -----------------------------------------------------------------------
915 const uint8_t* encrypted_page, size_t size,
916 const std::string& column_name,
917 int32_t row_group_ordinal) const {
918
919 auto license = commercial::require_feature("PME decrypt_dict_page");
920 if (!license) return license.error();
921
922 const auto& key = get_column_key(column_name);
923 if (key.empty()) {
924 return std::vector<uint8_t>(encrypted_page, encrypted_page + size);
925 }
926
927 auto cipher = CipherFactory::create_column_cipher(config_.algorithm, key);
928 if (key.size() != cipher->key_size()) {
930 detail::pme::pme_key_size_error(
931 key.size(), "column '" + column_name + "'")};
932 }
933
934 std::string extra = column_name + ":"
935 + std::to_string(row_group_ordinal) + ":0";
936
938 std::string aad = build_aad(config_.aad_prefix,
939 detail::pme::MODULE_DICT_PAGE, extra);
940 return cipher->decrypt(encrypted_page, size, aad);
941 } else {
942 return cipher->decrypt(encrypted_page, size);
943 }
944 }
945
946 // -----------------------------------------------------------------------
947 // decrypt_data_page_header -- Decrypt a data page header (Gap P-2)
948 //
949 // Counterpart to FileEncryptor::encrypt_data_page_header().
950 // Uses MODULE_DATA_PAGE_HEADER (4). Always GCM.
951 // -----------------------------------------------------------------------
961 const uint8_t* encrypted_header, size_t size,
962 const std::string& column_name,
963 int32_t row_group_ordinal,
964 int32_t page_ordinal) const {
965
966 auto license = commercial::require_feature("PME decrypt_data_page_header");
967 if (!license) return license.error();
968
969 const auto& key = get_column_key(column_name);
970 if (key.empty()) {
971 return std::vector<uint8_t>(encrypted_header, encrypted_header + size);
972 }
973
974 auto cipher = CipherFactory::create_metadata_cipher(config_.algorithm, key);
975 if (key.size() != cipher->key_size()) {
977 detail::pme::pme_key_size_error(
978 key.size(), "column '" + column_name + "'")};
979 }
980
981 std::string extra = column_name + ":"
982 + std::to_string(row_group_ordinal) + ":"
983 + std::to_string(page_ordinal);
984 std::string aad = build_aad(config_.aad_prefix,
985 detail::pme::MODULE_DATA_PAGE_HEADER, extra);
986 return cipher->decrypt(encrypted_header, size, aad);
987 }
988
989 // -----------------------------------------------------------------------
990 // decrypt_column_meta_header -- Decrypt a column metadata header (Gap P-2)
991 //
992 // Counterpart to FileEncryptor::encrypt_column_meta_header().
993 // Uses MODULE_COLUMN_META_HEADER (5). Always GCM.
994 // -----------------------------------------------------------------------
1002 const uint8_t* encrypted_header, size_t size,
1003 const std::string& column_name) const {
1004
1005 auto license = commercial::require_feature("PME decrypt_column_meta_header");
1006 if (!license) return license.error();
1007
1008 const auto& key = get_column_key(column_name);
1009 if (key.empty()) {
1010 return std::vector<uint8_t>(encrypted_header,
1011 encrypted_header + size);
1012 }
1013
1014 auto cipher = CipherFactory::create_metadata_cipher(config_.algorithm, key);
1015 if (key.size() != cipher->key_size()) {
1017 detail::pme::pme_key_size_error(
1018 key.size(), "column '" + column_name + "'")};
1019 }
1020
1021 std::string aad = build_aad(config_.aad_prefix,
1022 detail::pme::MODULE_COLUMN_META_HEADER,
1023 column_name);
1024 return cipher->decrypt(encrypted_header, size, aad);
1025 }
1026
1027 // -----------------------------------------------------------------------
1028 // unwrap_keys -- Unwrap DEKs from wrapped blobs via KMS (Gap P-5)
1029 //
1030 // For EXTERNAL key mode: takes a list of (key_id, wrapped_dek) pairs
1031 // read from file metadata and calls the KMS client to unwrap each DEK.
1032 // The unwrapped keys are populated into the config for subsequent
1033 // decrypt operations.
1034 //
1035 // Reference: Parquet PME spec (PARQUET-1178) §3, NIST SP 800-38F
1036 // -----------------------------------------------------------------------
1045 const std::vector<std::pair<std::string, std::vector<uint8_t>>>& wrapped_keys) {
1046
1047 auto license = commercial::require_feature("PME unwrap_keys");
1048 if (!license) return license.error();
1049
1050 if (!config_.kms_client) {
1052 "PME: KMS client not configured for key unwrapping"};
1053 }
1054
1055 for (const auto& [key_id, wrapped_dek] : wrapped_keys) {
1056 auto unwrapped = config_.kms_client->unwrap_key(wrapped_dek, key_id);
1057 if (!unwrapped) return unwrapped.error();
1058
1059 // Match key_id to the appropriate config slot
1060 if (key_id == config_.footer_key_id) {
1061 config_.footer_key = std::move(unwrapped.value());
1062 } else if (key_id == config_.default_column_key_id) {
1063 config_.default_column_key = std::move(unwrapped.value());
1064 } else {
1065 // Check per-column keys
1066 bool found = false;
1067 for (auto& ck : config_.column_keys) {
1068 if (ck.key_id == key_id) {
1069 ck.key = std::move(unwrapped.value());
1070 found = true;
1071 break;
1072 }
1073 }
1074 if (!found) {
1076 "PME: no config slot for KMS key_id '" + key_id + "'"};
1077 }
1078 }
1079 }
1080
1081 // Rebuild O(1) cache after key population
1082 key_cache_.clear();
1083 for (const auto& ck : config_.column_keys) {
1084 key_cache_[ck.column_name] = &ck.key;
1085 }
1086
1087 return {};
1088 }
1089
1090 // -----------------------------------------------------------------------
1091 // verify_footer_signature -- Verify signed plaintext footer (Gap P-3)
1092 //
1093 // Counterpart to FileEncryptor::sign_footer(). Splits the signed footer
1094 // into [footer_data] and [32-byte HMAC], recomputes the HMAC, and
1095 // performs constant-time comparison to detect tampering.
1096 //
1097 // Reference: Apache Parquet Encryption (PARQUET-1178) §4.2
1098 // -----------------------------------------------------------------------
1105 const uint8_t* signed_footer, size_t size) const {
1106
1107 auto license = commercial::require_feature("PME verify_footer_signature");
1108 if (!license) return license.error();
1109
1110 if (size < 32) {
1112 "PME: signed footer too short (need at least 32 bytes for HMAC)"};
1113 }
1114
1115 if (config_.footer_key.empty() || config_.footer_key.size() != PME_REQUIRED_KEY_SIZE) {
1117 detail::pme::pme_key_size_error(
1118 config_.footer_key.size(), "footer signature verification")};
1119 }
1120
1121 size_t footer_size = size - 32;
1122 const uint8_t* footer_data = signed_footer;
1123 const uint8_t* expected_hmac = signed_footer + footer_size;
1124
1125 // Derive the same signing key as FileEncryptor
1126 auto signing_key = derive_footer_signing_key();
1127
1128 // Recompute HMAC-SHA256(signing_key, aad || footer_data)
1129 std::string aad = build_aad(config_.aad_prefix, detail::pme::MODULE_FOOTER);
1130 std::vector<uint8_t> msg;
1131 msg.reserve(aad.size() + footer_size);
1132 msg.insert(msg.end(), aad.begin(), aad.end());
1133 msg.insert(msg.end(), footer_data, footer_data + footer_size);
1134
1135 auto computed_hmac = detail::hkdf::hmac_sha256(
1136 signing_key.data(), signing_key.size(),
1137 msg.data(), msg.size());
1138
1139 // Constant-time comparison to prevent timing side-channel
1140 uint8_t diff = 0;
1141 for (size_t i = 0; i < 32; ++i) {
1142 diff |= computed_hmac[i] ^ expected_hmac[i];
1143 }
1144
1145 if (diff != 0) {
1147 "PME: footer signature verification failed — data may be tampered"};
1148 }
1149
1150 return std::vector<uint8_t>(footer_data, footer_data + footer_size);
1151 }
1152
1155 [[nodiscard]] const EncryptionConfig& config() const { return config_; }
1156
1157private:
1158 EncryptionConfig config_;
1160 std::unordered_map<std::string, const std::vector<uint8_t>*> key_cache_;
1161
1163 [[nodiscard]] const std::vector<uint8_t>& get_column_key(
1164 const std::string& column_name) const {
1165
1166 auto it = key_cache_.find(column_name);
1167 if (it != key_cache_.end()) {
1168 return *it->second;
1169 }
1170 return config_.default_column_key;
1171 }
1172
1173 // -----------------------------------------------------------------------
1174 // derive_footer_signing_key -- HKDF-derived key for signed plaintext footer
1175 // -----------------------------------------------------------------------
1176 [[nodiscard]] std::array<uint8_t, 32> derive_footer_signing_key() const {
1177 static constexpr uint8_t INFO[] = "signet-pme-footer-sign-v1";
1178 auto prk = hkdf_extract(
1179 reinterpret_cast<const uint8_t*>(config_.aad_prefix.data()),
1180 config_.aad_prefix.size(),
1181 config_.footer_key.data(),
1182 config_.footer_key.size());
1183 std::array<uint8_t, 32> key{};
1184 (void)hkdf_expand(prk, INFO, sizeof(INFO) - 1, key.data(), key.size());
1185 return key;
1186 }
1187
1188 // -----------------------------------------------------------------------
1189 // IvParsed -- Result of parsing the IV header from encrypted data
1190 // -----------------------------------------------------------------------
1191 struct IvParsed {
1192 const uint8_t* iv; // Pointer into the input buffer
1193 const uint8_t* ciphertext; // Pointer past the IV
1194 size_t ct_size; // Ciphertext length (including GCM tag)
1195 };
1196
1197 // -----------------------------------------------------------------------
1198 // parse_iv_header -- Extract IV from the encrypted module header
1199 //
1200 // Input format: [1 byte: iv_size] [iv_size bytes: IV] [ciphertext...]
1201 // -----------------------------------------------------------------------
1202 [[nodiscard]] static expected<IvParsed> parse_iv_header(
1203 const uint8_t* data, size_t size) {
1204
1205 if (size < 1) {
1206 return Error{ErrorCode::ENCRYPTION_ERROR,
1207 "PME: encrypted data too short (no IV size byte)"};
1208 }
1209
1210 uint8_t iv_size = data[0];
1211 if (iv_size == 0 || iv_size > 16) {
1212 return Error{ErrorCode::ENCRYPTION_ERROR,
1213 "PME: invalid IV size " + std::to_string(iv_size)};
1214 }
1215
1216 size_t header_len = 1 + static_cast<size_t>(iv_size);
1217 if (size < header_len) {
1218 return Error{ErrorCode::ENCRYPTION_ERROR,
1219 "PME: encrypted data too short for IV"};
1220 }
1221
1222 return IvParsed{
1223 data + 1, // iv
1224 data + header_len, // ciphertext
1225 size - header_len // ct_size
1226 };
1227 }
1228
1229 // -----------------------------------------------------------------------
1230 // build_aad -- Same construction as FileEncryptor (dispatches on format)
1231 // -----------------------------------------------------------------------
1232 [[nodiscard]] std::string build_aad(const std::string& prefix,
1233 uint8_t module_type,
1234 const std::string& extra = "") const {
1235 return detail::pme::build_aad(config_, prefix, module_type, extra);
1236 }
1237
1238 // -----------------------------------------------------------------------
1239 // decrypt_gcm -- AES-GCM decryption with AAD (via ICipher)
1240 // -----------------------------------------------------------------------
1241 [[nodiscard]] expected<std::vector<uint8_t>> decrypt_gcm(
1242 const std::vector<uint8_t>& key,
1243 uint8_t module_type,
1244 const std::string& extra,
1245 const uint8_t* data, size_t size) const {
1246
1247 // Validate module_type range [0..5] per Parquet PME spec (CWE-20)
1248 if (module_type > 5) {
1249 return Error{ErrorCode::INVALID_ARGUMENT,
1250 "PME module_type out of range [0..5] (CWE-20)"};
1251 }
1252 std::string aad = build_aad(config_.aad_prefix, module_type, extra);
1253 auto cipher = CipherFactory::create_metadata_cipher(config_.algorithm, key);
1254 return cipher->decrypt(data, size, aad);
1255 }
1256
1257 // -----------------------------------------------------------------------
1258 // decrypt_ctr -- AES-CTR decryption (via ICipher)
1259 // -----------------------------------------------------------------------
1260 [[nodiscard]] expected<std::vector<uint8_t>> decrypt_ctr(
1261 const std::vector<uint8_t>& key,
1262 const uint8_t* data, size_t size) const {
1263
1266 return cipher->decrypt(data, size);
1267 }
1268};
1269
1270} // namespace signet::forge::crypto
AES-256-CTR stream cipher implementation (NIST SP 800-38A).
AES-256-GCM authenticated encryption (NIST SP 800-38D).
Abstract cipher interface, GCM/CTR adapters, CipherFactory, and platform CSPRNG.
Decrypts Parquet modules using the keys from an EncryptionConfig.
Definition pme.hpp:769
expected< std::vector< uint8_t > > decrypt_column_meta_header(const uint8_t *encrypted_header, size_t size, const std::string &column_name) const
Decrypt a column metadata header (always AES-GCM authenticated).
Definition pme.hpp:1001
expected< std::vector< uint8_t > > verify_footer_signature(const uint8_t *signed_footer, size_t size) const
Verify a signed plaintext footer and return the original footer data.
Definition pme.hpp:1104
FileDecryptor(const EncryptionConfig &config)
Construct a decryptor from an encryption configuration.
Definition pme.hpp:773
expected< std::vector< uint8_t > > decrypt_dict_page(const uint8_t *encrypted_page, size_t size, const std::string &column_name, int32_t row_group_ordinal) const
Decrypt a dictionary page.
Definition pme.hpp:914
const EncryptionConfig & config() const
Access the underlying EncryptionConfig.
Definition pme.hpp:1155
expected< void > unwrap_keys(const std::vector< std::pair< std::string, std::vector< uint8_t > > > &wrapped_keys)
Unwrap DEKs from wrapped blobs using the configured KMS client.
Definition pme.hpp:1044
expected< std::vector< uint8_t > > decrypt_data_page_header(const uint8_t *encrypted_header, size_t size, const std::string &column_name, int32_t row_group_ordinal, int32_t page_ordinal) const
Decrypt a data page header (always AES-GCM authenticated).
Definition pme.hpp:960
expected< std::vector< uint8_t > > decrypt_footer(const uint8_t *encrypted_footer, size_t size) const
Decrypt the encrypted FileMetaData (footer).
Definition pme.hpp:795
expected< std::vector< uint8_t > > decrypt_column_page(const uint8_t *encrypted_page, size_t size, const std::string &column_name, int32_t row_group_ordinal, int32_t page_ordinal) const
Decrypt a column data page (AES-GCM or AES-CTR depending on algorithm).
Definition pme.hpp:830
expected< std::vector< uint8_t > > decrypt_column_metadata(const uint8_t *encrypted_metadata, size_t size, const std::string &column_name) const
Decrypt serialized ColumnMetaData (always AES-GCM authenticated).
Definition pme.hpp:876
Encrypts Parquet modules (footer, column metadata, data pages) using the keys and algorithm specified...
Definition pme.hpp:196
EncryptionKeyMetadata column_key_metadata(const std::string &column_name) const
Get key metadata for a column (stored in ColumnChunk.column_crypto_metadata).
Definition pme.hpp:613
expected< std::vector< uint8_t > > encrypt_data_page_header(const uint8_t *header_data, size_t size, const std::string &column_name, int32_t row_group_ordinal, int32_t page_ordinal) const
Encrypt a data page header (always AES-GCM authenticated).
Definition pme.hpp:422
FileEncryptionProperties file_properties() const
Get FileEncryptionProperties for embedding in FileMetaData.
Definition pme.hpp:602
bool is_column_encrypted(const std::string &column_name) const
Check if a column has an encryption key (specific or default).
Definition pme.hpp:641
const EncryptionConfig & config() const
Access the underlying EncryptionConfig.
Definition pme.hpp:647
expected< std::vector< uint8_t > > encrypt_column_page(const uint8_t *page_data, size_t size, const std::string &column_name, int32_t row_group_ordinal, int32_t page_ordinal) const
Encrypt a column data page.
Definition pme.hpp:269
expected< std::vector< uint8_t > > encrypt_dict_page(const uint8_t *page_data, size_t size, const std::string &column_name, int32_t row_group_ordinal) const
Encrypt a dictionary page with the column's encryption key.
Definition pme.hpp:368
expected< std::vector< uint8_t > > encrypt_column_metadata(const uint8_t *metadata, size_t size, const std::string &column_name) const
Encrypt serialized ColumnMetaData with AES-GCM (always authenticated).
Definition pme.hpp:320
expected< std::vector< std::pair< std::string, std::vector< uint8_t > > > > wrap_keys() const
Wrap all DEKs under their KEKs using the configured KMS client.
Definition pme.hpp:558
FileEncryptor(const EncryptionConfig &config)
Construct an encryptor from an encryption configuration.
Definition pme.hpp:200
expected< std::vector< uint8_t > > encrypt_column_meta_header(const uint8_t *header_data, size_t size, const std::string &column_name) const
Encrypt a column metadata header (always AES-GCM authenticated).
Definition pme.hpp:463
expected< std::vector< uint8_t > > sign_footer(const uint8_t *footer_data, size_t size) const
Sign the plaintext footer with HMAC-SHA256 (signed plaintext footer mode).
Definition pme.hpp:510
expected< std::vector< uint8_t > > encrypt_footer(const uint8_t *footer_data, size_t size) const
Encrypt the serialized FileMetaData (footer) with AES-GCM.
Definition pme.hpp:225
A lightweight result type that holds either a success value of type T or an Error.
Definition error.hpp:145
HKDF key derivation (RFC 5869) using HMAC-SHA256.
Key material, encryption configuration, and TLV serialization for Parquet Modular Encryption (PME).
std::vector< uint8_t > generate_iv(size_t iv_size)
Generate a random initialization vector of the specified size.
std::array< uint8_t, 32 > hmac_sha256(const uint8_t *key, size_t key_size, const uint8_t *data, size_t data_size)
HMAC-SHA256 (RFC 2104): keyed hash for HKDF.
Definition hkdf.hpp:44
constexpr size_t PME_AES128_KEY_SIZE
AES-128 key size — detected for interop diagnostics only (Gap P-7).
Definition pme.hpp:74
std::array< uint8_t, 32 > hkdf_extract(const uint8_t *salt, size_t salt_size, const uint8_t *ikm, size_t ikm_size)
HKDF-Extract (RFC 5869 §2.2): Extract a pseudorandom key from input keying material.
Definition hkdf.hpp:107
constexpr size_t PME_REQUIRED_KEY_SIZE
Required AES-256 key size for all PME operations (NIST SP 800-131A).
Definition pme.hpp:71
bool hkdf_expand(const std::array< uint8_t, 32 > &prk, const uint8_t *info, size_t info_size, uint8_t *output, size_t output_size)
HKDF-Expand (RFC 5869 §2.3): Expand PRK to output keying material.
Definition hkdf.hpp:126
@ INTERNAL
Key material stored directly in file metadata (testing/dev).
@ AES_GCM_CTR_V1
AES-256-GCM for footer, AES-256-CTR for column data (Parquet default).
@ AES_GCM_V1
AES-256-GCM for both footer and column data.
@ ENCRYPTION_ERROR
An encryption or decryption operation failed (bad key, tampered ciphertext, PME error).
@ INVALID_ARGUMENT
A caller-supplied argument is outside the valid range or violates a precondition.
Lightweight error value carrying an ErrorCode and a human-readable message.
Definition error.hpp:101
static std::unique_ptr< ICipher > create_footer_cipher(EncryptionAlgorithm, const std::vector< uint8_t > &key)
Create a footer cipher (always authenticated = GCM).
static std::unique_ptr< ICipher > create_metadata_cipher(EncryptionAlgorithm, const std::vector< uint8_t > &key)
Create a metadata cipher (always authenticated = GCM).
static std::unique_ptr< ICipher > create_column_cipher(EncryptionAlgorithm algo, const std::vector< uint8_t > &key)
Create a column data cipher (GCM or CTR based on algorithm).
Top-level configuration structure that drives FileEncryptor / FileDecryptor.
std::vector< uint8_t > default_column_key
Default column key (32 bytes).
std::string default_column_key_id
KMS key identifier for the default column key (EXTERNAL mode).
std::shared_ptr< IKmsClient > kms_client
Optional KMS client for DEK/KEK key wrapping (EXTERNAL key mode).
bool encrypt_footer
If true, the footer is encrypted.
std::vector< uint8_t > footer_key
32-byte AES-256 key for encrypting the Parquet footer (FileMetaData).
KeyMode key_mode
INTERNAL: keys stored in file metadata. EXTERNAL: KMS references only.
std::string aad_prefix
AAD prefix – typically a file identifier or URI.
EncryptionAlgorithm algorithm
Encryption algorithm (GCM everywhere, or GCM-footer + CTR-columns).
std::vector< ColumnKeySpec > column_keys
Per-column key specifications. Columns listed here get their own key.
std::string footer_key_id
KMS key identifier for the footer key (EXTERNAL mode).
Per-key metadata stored alongside encrypted Parquet components.
std::vector< uint8_t > key_material
Raw AES key bytes (INTERNAL mode only).
std::string key_id
KMS key reference (EXTERNAL mode).
KeyMode key_mode
INTERNAL or EXTERNAL key mode.
Stored in the Parquet FileMetaData.encryption_algorithm field.