47#include <unordered_map>
48#include <unordered_set>
107 const std::vector<uint8_t>& dek,
108 const std::string& master_key_id)
const = 0;
116 const std::vector<uint8_t>& wrapped_dek,
117 const std::string& master_key_id)
const = 0;
236namespace detail::meta {
240 dst[0] =
static_cast<uint8_t
>(val);
241 dst[1] =
static_cast<uint8_t
>(val >> 8);
242 dst[2] =
static_cast<uint8_t
>(val >> 16);
243 dst[3] =
static_cast<uint8_t
>(val >> 24);
248 return static_cast<uint32_t
>(src[0])
249 | (
static_cast<uint32_t
>(src[1]) << 8)
250 | (
static_cast<uint32_t
>(src[2]) << 16)
251 | (
static_cast<uint32_t
>(src[3]) << 24);
263 const uint8_t* data, uint32_t len) {
264 size_t pos = buf.size();
265 buf.resize(pos + 8 + len);
269 std::memcpy(buf.data() + pos + 8, data, len);
275 uint32_t tag, int32_t val) {
283 uint32_t tag, uint8_t val) {
289 uint32_t tag,
const std::string& s) {
291 throw std::overflow_error(
"TLV value exceeds maximum length");
294 reinterpret_cast<const uint8_t*
>(s.data()),
295 static_cast<uint32_t
>(s.size()));
301 const std::vector<uint8_t>& blob) {
303 throw std::overflow_error(
"TLV value exceeds maximum length");
305 append_tlv(buf, tag, blob.data(),
static_cast<uint32_t
>(blob.size()));
323inline bool read_tlv(
const uint8_t* buf,
size_t buf_size,
325 if (offset + 8 > buf_size)
return false;
331 size_t remaining = buf_size - (offset + 8);
332 if (field.
length > remaining)
return false;
333 field.
data = buf + offset + 8;
334 offset += 8 + field.
length;
340 if (field.
length != 4)
return false;
347 if (field.
length != 1)
return false;
354 return std::string(
reinterpret_cast<const char*
>(field.
data), field.
length);
359 return std::vector<uint8_t>(field.
data, field.
data + field.
length);
393 using namespace detail::meta;
395 std::vector<uint8_t> buf;
399 append_tlv_i32(buf, TAG_KEY_MODE,
static_cast<int32_t
>(
key_mode));
403#ifdef SIGNET_PRODUCTION_MODE
406 throw std::runtime_error(
407 "KeyMode::INTERNAL is disabled in production builds "
408 "(SIGNET_PRODUCTION_MODE). Use EXTERNAL key management.");
410#ifndef SIGNET_SUPPRESS_INTERNAL_KEY_WARNING
411 static bool warned =
false;
413 fprintf(stderr,
"[SIGNET WARNING] KeyMode::INTERNAL stores encryption key in file metadata — NOT for production use\n");
420 append_tlv_str(buf, TAG_KEY_ID,
key_id);
431 const uint8_t* data,
size_t size) {
433 using namespace detail::meta;
435 if (size > MAX_METADATA_SIZE) {
437 "key metadata exceeds 1 MB limit (CWE-770)"};
442 bool found_mode =
false;
444 while (offset < size) {
446 if (!read_tlv(data, size, offset, field)) {
448 "EncryptionKeyMetadata: truncated TLV field"};
454 if (!tlv_to_i32(field, mode_val)) {
456 "EncryptionKeyMetadata: invalid key_mode field"};
458 if (mode_val < 0 || mode_val > 1) {
460 "EncryptionKeyMetadata: invalid KeyMode value"};
466 case TAG_KEY_MATERIAL:
470 meta.
key_id = tlv_to_str(field);
480 "EncryptionKeyMetadata: missing key_mode field"};
503 using namespace detail::meta;
505 std::vector<uint8_t> buf;
508 append_tlv_i32(buf, TAG_ALGORITHM,
static_cast<int32_t
>(
algorithm));
511 append_tlv_str(buf, TAG_AAD_PREFIX,
aad_prefix);
522 const uint8_t* data,
size_t size) {
524 using namespace detail::meta;
526 if (size > MAX_METADATA_SIZE) {
528 "key metadata exceeds 1 MB limit (CWE-770)"};
533 bool found_algo =
false;
535 while (offset < size) {
537 if (!read_tlv(data, size, offset, field)) {
539 "FileEncryptionProperties: truncated TLV field"};
543 case TAG_ALGORITHM: {
545 if (!tlv_to_i32(field, algo_val)) {
547 "FileEncryptionProperties: invalid algorithm field"};
549 if (algo_val < 0 || algo_val > 1) {
551 "FileEncryptionProperties: invalid EncryptionAlgorithm value"};
557 case TAG_FOOTER_ENCRYPTED: {
559 if (!tlv_to_u8(field, val)) {
561 "FileEncryptionProperties: invalid footer_encrypted field"};
577 "FileEncryptionProperties: missing algorithm field"};
620namespace detail::thrift_crypto {
633 const std::string& aad_prefix =
"") {
647 if (!aad_prefix.empty()) {
650 reinterpret_cast<const uint8_t*
>(aad_prefix.data()),
664 std::vector<uint8_t> key_blob;
666 key_blob.push_back(
static_cast<uint8_t
>(meta.
key_mode));
668 key_blob.push_back(0x01);
670 uint16_t klen =
static_cast<uint16_t
>(meta.
key_material.size());
671 key_blob.push_back(
static_cast<uint8_t
>(klen & 0xFF));
672 key_blob.push_back(
static_cast<uint8_t
>((klen >> 8) & 0xFF));
673 key_blob.insert(key_blob.end(),
676 if (!meta.
key_id.empty()) {
677 key_blob.push_back(0x02);
678 uint16_t idlen =
static_cast<uint16_t
>(meta.
key_id.size());
679 key_blob.push_back(
static_cast<uint8_t
>(idlen & 0xFF));
680 key_blob.push_back(
static_cast<uint8_t
>((idlen >> 8) & 0xFF));
681 key_blob.insert(key_blob.end(), meta.
key_id.begin(), meta.
key_id.end());
694 const uint8_t* data,
size_t size) {
698 "Thrift key metadata exceeds 1 MB limit"};
703 bool found_key_metadata =
false;
707 if (fh.is_stop())
break;
709 switch (fh.field_id) {
718 if (blob.empty())
break;
719 found_key_metadata =
true;
722 if (off < blob.size()) {
725 while (off < blob.size()) {
726 uint8_t marker = blob[off++];
727 if (marker == 0x01 && off + 2 <= blob.size()) {
729 uint16_t klen =
static_cast<uint16_t
>(blob[off])
730 | (
static_cast<uint16_t
>(blob[off + 1]) << 8);
732 if (off + klen <= blob.size()) {
733 meta.
key_material.assign(blob.begin() +
static_cast<ptrdiff_t
>(off),
734 blob.begin() +
static_cast<ptrdiff_t
>(off + klen));
737 }
else if (marker == 0x02 && off + 2 <= blob.size()) {
739 uint16_t idlen =
static_cast<uint16_t
>(blob[off])
740 | (
static_cast<uint16_t
>(blob[off + 1]) << 8);
742 if (off + idlen <= blob.size()) {
743 meta.
key_id = std::string(
744 reinterpret_cast<const char*
>(blob.data() + off), idlen);
759 if (!found_key_metadata) {
761 "Thrift ColumnCryptoMetaData: missing key_metadata field"};
788 reinterpret_cast<const uint8_t*
>(props.
aad_prefix.data()),
811 const uint8_t* data,
size_t size) {
815 "Thrift file properties exceeds 1 MB limit"};
820 bool found_algo =
false;
824 if (fh.is_stop())
break;
826 switch (fh.field_id) {
831 if (!inner_fh.is_stop()) {
832 if (inner_fh.field_id == 1) {
843 if (aes_fh.is_stop())
break;
844 if (aes_fh.field_id == 1 &&
848 reinterpret_cast<const char*
>(prefix_bin.data()),
859 if (ufh.is_stop())
break;
881 "Thrift FileCryptoMetaData: missing encryption_algorithm"};
949#if defined(SIGNET_REQUIRE_COMMERCIAL_LICENSE) && SIGNET_REQUIRE_COMMERCIAL_LICENSE
952 "INTERNAL key mode stores plaintext keys in file metadata — "
953 "not allowed in production builds (FIPS 140-3 §7.7). "
954 "Use EXTERNAL key mode with a KMS client."};
1028 const std::string& subject_id,
1029 const std::vector<uint8_t>& dek) {
1031 if (subject_id.empty()) {
1033 "CryptoShredder: subject_id must not be empty"};
1035 if (keys_.count(subject_id) > 0) {
1037 "CryptoShredder: subject '" + subject_id +
"' already registered"};
1039 keys_[subject_id] = dek;
1047 const std::string& subject_id)
const {
1049 auto it = keys_.find(subject_id);
1050 if (it == keys_.end()) {
1052 if (shredded_.count(subject_id) > 0) {
1054 "CryptoShredder: subject '" + subject_id +
1055 "' has been cryptographically erased (GDPR Art. 17)"};
1058 "CryptoShredder: subject '" + subject_id +
"' not found"};
1071 auto it = keys_.find(subject_id);
1072 if (it == keys_.end()) {
1074 "CryptoShredder: subject '" + subject_id +
"' not found"};
1078 volatile unsigned char* p =
1079 reinterpret_cast<volatile unsigned char*
>(it->second.data());
1080 for (
size_t i = 0; i < it->second.size(); ++i) p[i] = 0;
1083 shredded_.insert(subject_id);
1089 return shredded_.count(subject_id) > 0;
1099 std::unordered_map<std::string, std::vector<uint8_t>> keys_;
1100 std::unordered_set<std::string> shredded_;
Per-subject key store supporting cryptographic erasure.
expected< const std::vector< uint8_t > * > get_key(const std::string &subject_id) const
Retrieve a subject's DEK for encryption/decryption.
bool is_shredded(const std::string &subject_id) const
Check if a subject has been cryptographically erased.
size_t shredded_count() const
Number of shredded subjects.
expected< void > shred(const std::string &subject_id)
Cryptographically shred a subject's data by destroying their DEK.
size_t active_count() const
Number of active (non-shredded) subjects.
expected< void > register_subject(const std::string &subject_id, const std::vector< uint8_t > &dek)
Register a data subject's DEK.
Abstract KMS client interface for DEK/KEK key wrapping.
virtual expected< std::vector< uint8_t > > unwrap_key(const std::vector< uint8_t > &wrapped_dek, const std::string &master_key_id) const =0
Unwrap (decrypt) a wrapped DEK using the KEK identified by master_key_id.
virtual ~IKmsClient()=default
virtual expected< std::vector< uint8_t > > wrap_key(const std::vector< uint8_t > &dek, const std::string &master_key_id) const =0
Wrap (encrypt) a DEK under the KEK identified by master_key_id.
A lightweight result type that holds either a success value of type T or an Error.
Thrift Compact Protocol reader.
void begin_struct()
Push a new field-ID context for reading a nested struct.
void end_struct()
Pop the field-ID context after finishing a nested struct.
FieldHeader read_field_header()
Read a field header.
void skip_field(uint8_t thrift_type)
Skip a field without parsing its value.
std::vector< uint8_t > read_binary()
Read raw binary data (varint-length-prefixed bytes).
Thrift Compact Protocol writer.
void begin_struct()
Push a new field-ID context for a nested struct.
void end_struct()
Pop the field-ID context after finishing a nested struct.
void write_field_bool(int16_t field_id, bool val)
Write a bool field where the value is embedded in the field header's type nibble (1 = true,...
const std::vector< uint8_t > & data() const
Returns a const reference to the underlying byte buffer.
void write_field(int16_t field_id, uint8_t thrift_type)
Write a field header.
void write_stop()
Write struct stop marker (0x00).
void write_binary(const uint8_t *data, size_t len)
Write raw binary data as varint-length-prefixed bytes.
Thrift Compact Protocol encoder and decoder for Parquet metadata serialization.
std::vector< uint8_t > serialize_file_properties(const FileEncryptionProperties &props)
Serialize FileEncryptionProperties to Thrift Compact Protocol.
expected< EncryptionKeyMetadata > deserialize_key_metadata(const uint8_t *data, size_t size)
Deserialize EncryptionKeyMetadata from Thrift Compact Protocol.
expected< FileEncryptionProperties > deserialize_file_properties(const uint8_t *data, size_t size)
Deserialize FileEncryptionProperties from Thrift Compact Protocol.
std::vector< uint8_t > serialize_key_metadata(const EncryptionKeyMetadata &meta, EncryptionAlgorithm algo=EncryptionAlgorithm::AES_GCM_CTR_V1, const std::string &aad_prefix="")
Serialize EncryptionKeyMetadata to Thrift Compact Protocol.
KeyMode
How the encryption key is stored or referenced.
@ INTERNAL
Key material stored directly in file metadata (testing/dev).
@ EXTERNAL
Key referenced by KMS key ID; actual key resolved from KMS at runtime.
expected< void > validate_key_mode_for_production(KeyMode mode)
Check if INTERNAL key mode is allowed in the current build.
MetadataFormat
Wire format for key metadata serialization.
@ TLV
Signet v1 custom TLV format.
@ THRIFT
Parquet Thrift Compact Protocol (spec-compliant)
AlgorithmStatus
Algorithm lifecycle status per NIST SP 800-131A.
@ LEGACY
Only for processing existing data (no new encryption).
@ ACCEPTABLE
Approved for use.
@ DISALLOWED
Must not be used.
@ DEPRECATED
Still allowed but scheduled for removal.
EncryptionAlgorithm
Encryption algorithm identifier.
@ AES_GCM_CTR_V1
AES-256-GCM for footer, AES-256-CTR for column data (Parquet default).
@ AES_GCM_V1
AES-256-GCM for both footer and column data.
constexpr uint8_t STRUCT
Nested struct.
constexpr uint8_t BINARY
Length-prefixed bytes (also used for STRING).
constexpr uint8_t BOOL_TRUE
Boolean true (embedded in field header).
@ ENCRYPTION_ERROR
An encryption or decryption operation failed (bad key, tampered ciphertext, PME error).
@ INVALID_ARGUMENT
A caller-supplied argument is outside the valid range or violates a precondition.
Lightweight error value carrying an ErrorCode and a human-readable message.
Algorithm deprecation entry.
std::string transition_guidance
Migration guidance.
int32_t min_key_bits
Minimum key length in bits.
std::string algorithm_name
E.g. "AES-256-GCM", "AES-128-CTR", "3DES".
int64_t sunset_ns
Planned deprecation timestamp (0 = no sunset).
Specifies the encryption key for a single Parquet column.
std::string column_name
Parquet column path (e.g. "a.b.c").
std::string key_id
KMS key identifier (EXTERNAL mode).
std::vector< uint8_t > key
32-byte AES-256 key (INTERNAL mode).
Top-level configuration structure that drives FileEncryptor / FileDecryptor.
std::vector< uint8_t > default_column_key
Default column key (32 bytes).
std::string default_column_key_id
KMS key identifier for the default column key (EXTERNAL mode).
std::shared_ptr< IKmsClient > kms_client
Optional KMS client for DEK/KEK key wrapping (EXTERNAL key mode).
bool encrypt_footer
If true, the footer is encrypted.
std::vector< uint8_t > footer_key
32-byte AES-256 key for encrypting the Parquet footer (FileMetaData).
KeyMode key_mode
INTERNAL: keys stored in file metadata. EXTERNAL: KMS references only.
AadFormat
AAD construction format.
@ SPEC_BINARY
Parquet PME spec: fixed-width binary AAD.
@ LEGACY
Signet v1: null-separated string AAD.
std::string aad_prefix
AAD prefix – typically a file identifier or URI.
EncryptionAlgorithm algorithm
Encryption algorithm (GCM everywhere, or GCM-footer + CTR-columns).
std::vector< ColumnKeySpec > column_keys
Per-column key specifications. Columns listed here get their own key.
std::string footer_key_id
KMS key identifier for the footer key (EXTERNAL mode).
Stored in the Parquet FileMetaData.encryption_algorithm field.
std::string aad_prefix
AAD prefix bound into GCM auth tags.
EncryptionAlgorithm algorithm
Encryption algorithm.
static expected< FileEncryptionProperties > deserialize(const uint8_t *data, size_t size)
Deserialize from bytes.
std::vector< uint8_t > serialize() const
Serialize to bytes using TLV format.
bool footer_encrypted
Whether the footer itself is encrypted.
Key rotation request describing old → new key transition.
std::vector< uint8_t > new_key
Replacement key.
std::vector< uint8_t > old_key
Current (old) key.
std::string reason
Rotation reason: "scheduled", "compromised", "policy".
std::string key_id
Key being rotated.
int64_t requested_ns
Rotation request timestamp.
std::string key_id
Rotated key ID.
int64_t files_re_encrypted
Number of files re-encrypted.
int64_t completed_ns
Completion timestamp.
std::string error_message
Error message (if not successful).
bool success
Whether the rotation completed.