Signet Forge 0.1.1
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
pme_facade.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3#pragma once
4
29
30#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
31#error "pme_facade.hpp requires SIGNET_ENABLE_COMMERCIAL=ON"
32#endif
33
37#include "signet/crypto/pme.hpp"
38#include "signet/error.hpp"
39
40#include <array>
41#include <cstdint>
42#include <memory>
43#include <string>
44#include <unordered_map>
45#include <vector>
46
47namespace signet::forge::crypto {
48
49// =========================================================================
50// Column classification for PME
51// =========================================================================
52
55enum class ColumnClassification : uint8_t {
56 REFERENCE = 0,
57 FINANCIAL = 1,
58 PII = 2,
59 HEALTH = 3,
60 RESTRICTED = 4,
61};
62
63// =========================================================================
64// KeyHandle — opaque, RAII, key-zeroing
65// =========================================================================
66
76class KeyHandle {
77public:
79 [[nodiscard]] static KeyHandle generate() {
80 KeyHandle h;
81 detail::cipher::fill_random_bytes(h.key_.data(), KEY_SIZE);
82 detail::secure_mem::lock_memory(h.key_.data(), KEY_SIZE);
83 h.valid_ = true;
84 return h;
85 }
86
90 [[nodiscard]] static KeyHandle from_bytes(const uint8_t* data, size_t size) {
91 if (size != KEY_SIZE) {
92 throw std::invalid_argument(
93 "KeyHandle requires exactly 32 bytes (AES-256), got " +
94 std::to_string(size));
95 }
96 KeyHandle h;
97 std::memcpy(h.key_.data(), data, KEY_SIZE);
98 detail::secure_mem::lock_memory(h.key_.data(), KEY_SIZE);
99 h.valid_ = true;
100 return h;
101 }
102
105 [[nodiscard]] static KeyHandle from_seed(const std::string& seed) {
106 KeyHandle h;
107 auto prk = hkdf_extract(nullptr, 0,
108 reinterpret_cast<const uint8_t*>(seed.data()), seed.size());
109 hkdf_expand(prk,
110 reinterpret_cast<const uint8_t*>("signet-pme-test-key"),
111 19, h.key_.data(), KEY_SIZE);
112 detail::secure_mem::lock_memory(h.key_.data(), KEY_SIZE);
113 h.valid_ = true;
114 return h;
115 }
116
118 if (valid_) {
119 detail::secure_mem::secure_zero(key_.data(), KEY_SIZE);
120 detail::secure_mem::unlock_memory(key_.data(), KEY_SIZE);
121 valid_ = false;
122 }
123 }
124
125 // Move-only (no copy — prevents key duplication)
126 KeyHandle(KeyHandle&& other) noexcept
127 : key_(other.key_), valid_(other.valid_) {
128 if (other.valid_) {
129 std::memset(other.key_.data(), 0, KEY_SIZE);
130 other.valid_ = false;
131 }
132 }
133 KeyHandle& operator=(KeyHandle&& other) noexcept {
134 if (this != &other) {
135 if (valid_) {
136 detail::secure_mem::secure_zero(key_.data(), KEY_SIZE);
137 detail::secure_mem::unlock_memory(key_.data(), KEY_SIZE);
138 }
139 key_ = other.key_;
140 valid_ = other.valid_;
141 if (other.valid_) {
142 std::memset(other.key_.data(), 0, KEY_SIZE);
143 other.valid_ = false;
144 }
145 }
146 return *this;
147 }
148 KeyHandle(const KeyHandle&) = delete;
149 KeyHandle& operator=(const KeyHandle&) = delete;
150
152 [[nodiscard]] bool is_valid() const noexcept { return valid_; }
153
155 static constexpr size_t key_size() noexcept { return KEY_SIZE; }
156
157 // -- Internal access (C++ only, NOT exposed to Python) --
158
160 [[nodiscard]] const uint8_t* data() const noexcept { return key_.data(); }
161
163 [[nodiscard]] std::vector<uint8_t> to_vector() const {
164 return std::vector<uint8_t>(key_.begin(), key_.end());
165 }
166
167private:
168 KeyHandle() : key_{}, valid_(false) {}
169 static constexpr size_t KEY_SIZE = 32;
170 std::array<uint8_t, KEY_SIZE> key_;
171 bool valid_;
172};
173
174// =========================================================================
175// EncryptedWriterOptions — high-level config that builds EncryptionConfig
176// =========================================================================
177
186
188 bool encrypt_footer = true;
189
191 std::string aad_prefix = "signet-forge-pme";
192
195 std::unordered_map<std::string, ColumnClassification> column_classes;
196
198 void classify(const std::string& column_name, ColumnClassification cls) {
199 column_classes[column_name] = cls;
200 }
201
207 [[nodiscard]] EncryptionConfig build_config(const KeyHandle& master_key) const {
208 if (!master_key.is_valid()) {
209 throw std::runtime_error("KeyHandle is not valid (moved or not initialised)");
210 }
211
212 EncryptionConfig config;
213 config.algorithm = algorithm;
215 config.footer_key = master_key.to_vector();
216 config.aad_prefix = aad_prefix;
218
219 // Derive per-column keys via HKDF
220 for (const auto& [col_name, cls] : column_classes) {
221 if (cls == ColumnClassification::REFERENCE) continue;
222
223 // HKDF info = "signet-pme-col:" + classification + ":" + column_name
224 std::string info = "signet-pme-col:" +
225 std::to_string(static_cast<int>(cls)) + ":" + col_name;
226
227 ColumnKeySpec spec;
228 spec.column_name = col_name;
229 spec.key.resize(32);
230
231 auto prk = hkdf_extract(nullptr, 0,
232 master_key.data(), KeyHandle::key_size());
233 (void)hkdf_expand(prk,
234 reinterpret_cast<const uint8_t*>(info.data()),
235 info.size(), spec.key.data(), 32);
236
237 config.column_keys.push_back(std::move(spec));
238 }
239
240 return config;
241 }
242};
243
244// =========================================================================
245// EncryptedReaderOptions — high-level config for decryption + RBAC
246// =========================================================================
247
256
258 bool encrypted_footer = true;
259
261 std::string aad_prefix = "signet-forge-pme";
262
264 std::unordered_map<std::string, ColumnClassification> column_classes;
265
269 std::vector<std::string> authorised_columns;
270
272 void classify(const std::string& column_name, ColumnClassification cls) {
273 column_classes[column_name] = cls;
274 }
275
277 void authorise(const std::string& column_name) {
278 authorised_columns.push_back(column_name);
279 }
280
282 [[nodiscard]] bool is_authorised(const std::string& column_name) const {
283 if (authorised_columns.empty()) return true; // empty = full access
284 for (const auto& name : authorised_columns) {
285 if (name == column_name) return true;
286 }
287 return false;
288 }
289
292 [[nodiscard]] EncryptionConfig build_config(const KeyHandle& master_key) const {
293 if (!master_key.is_valid()) {
294 throw std::runtime_error("KeyHandle is not valid");
295 }
296
297 EncryptionConfig config;
298 config.algorithm = algorithm;
300 config.footer_key = master_key.to_vector();
301 config.aad_prefix = aad_prefix;
303
304 for (const auto& [col_name, cls] : column_classes) {
305 if (cls == ColumnClassification::REFERENCE) continue;
306 if (!is_authorised(col_name)) continue; // RBAC: skip unauthorised
307
308 std::string info = "signet-pme-col:" +
309 std::to_string(static_cast<int>(cls)) + ":" + col_name;
310
311 ColumnKeySpec spec;
312 spec.column_name = col_name;
313 spec.key.resize(32);
314
315 auto prk = hkdf_extract(nullptr, 0,
316 master_key.data(), KeyHandle::key_size());
317 (void)hkdf_expand(prk,
318 reinterpret_cast<const uint8_t*>(info.data()),
319 info.size(), spec.key.data(), 32);
320
321 config.column_keys.push_back(std::move(spec));
322 }
323
324 return config;
325 }
326};
327
328} // namespace signet::forge::crypto
Abstract cipher interface, GCM/CTR adapters, CipherFactory, and platform CSPRNG.
Opaque handle to AES-256 key material held in a SecureKeyBuffer.
KeyHandle(const KeyHandle &)=delete
static KeyHandle from_seed(const std::string &seed)
Construct from a deterministic seed (for benchmarking/testing ONLY).
KeyHandle & operator=(KeyHandle &&other) noexcept
bool is_valid() const noexcept
Check if the handle holds a valid key.
static KeyHandle generate()
Generate a new random AES-256 key via platform CSPRNG.
std::vector< uint8_t > to_vector() const
Get key as a vector (copies — use sparingly). For EncryptionConfig construction.
static KeyHandle from_bytes(const uint8_t *data, size_t size)
Construct from raw bytes (C++ internal use only — NOT exposed to Python).
static constexpr size_t key_size() noexcept
Key size in bytes (always 32 for AES-256).
const uint8_t * data() const noexcept
Access raw key bytes. MUST NOT be exposed through FFI.
KeyHandle(KeyHandle &&other) noexcept
KeyHandle & operator=(const KeyHandle &)=delete
HKDF key derivation (RFC 5869) using HMAC-SHA256.
Key material, encryption configuration, and TLV serialization for Parquet Modular Encryption (PME).
void fill_random_bytes(uint8_t *buf, size_t size)
Fill a buffer with cryptographically random bytes using the best available OS-level CSPRNG (CWE-338: ...
void secure_zero(void *ptr, size_t size)
Securely zero a memory region (not optimized out by the compiler).
bool lock_memory(void *ptr, size_t size)
Lock a memory region so it is not paged to swap.
void unlock_memory(void *ptr, size_t size)
Unlock a previously locked memory region.
ColumnClassification
Classification determines whether a column is encrypted and with which key derivation context.
@ RESTRICTED
Encrypted — general restricted data.
@ HEALTH
Encrypted — health/biometric data (HIPAA)
@ PII
Encrypted — personally identifiable information.
@ FINANCIAL
Encrypted — financial data (prices, quantities)
@ REFERENCE
Unencrypted — public/reference data.
std::array< uint8_t, 32 > hkdf_extract(const uint8_t *salt, size_t salt_size, const uint8_t *ikm, size_t ikm_size)
HKDF-Extract (RFC 5869 §2.2): Extract a pseudorandom key from input keying material.
Definition hkdf.hpp:107
bool hkdf_expand(const std::array< uint8_t, 32 > &prk, const uint8_t *info, size_t info_size, uint8_t *output, size_t output_size)
HKDF-Expand (RFC 5869 §2.3): Expand PRK to output keying material.
Definition hkdf.hpp:126
@ INTERNAL
Key material stored directly in file metadata (testing/dev).
EncryptionAlgorithm
Encryption algorithm identifier.
@ AES_GCM_CTR_V1
AES-256-GCM for footer, AES-256-CTR for column data (Parquet default).
Parquet Modular Encryption (PME) orchestrator – encrypts and decrypts Parquet file components (footer...
Specifies the encryption key for a single Parquet column.
std::string column_name
Parquet column path (e.g. "a.b.c").
std::vector< uint8_t > key
32-byte AES-256 key (INTERNAL mode).
High-level decryption options for ParquetReader.
std::string aad_prefix
AAD prefix (must match writer).
bool encrypted_footer
Whether the footer was encrypted (must match writer).
EncryptionAlgorithm algorithm
Algorithm must match the writer's algorithm.
std::vector< std::string > authorised_columns
Authorised columns for this reader.
std::unordered_map< std::string, ColumnClassification > column_classes
Column classifications (must match writer for correct key derivation).
EncryptionConfig build_config(const KeyHandle &master_key) const
Build the low-level EncryptionConfig for decryption.
void authorise(const std::string &column_name)
Authorise a column for reading.
void classify(const std::string &column_name, ColumnClassification cls)
Classify a column (must match writer classification for correct HKDF).
bool is_authorised(const std::string &column_name) const
Check if a column is authorised for this reader.
High-level encryption options for ParquetWriter.
std::unordered_map< std::string, ColumnClassification > column_classes
Column classifications.
std::string aad_prefix
AAD prefix — typically a file URI or tenant identifier.
EncryptionAlgorithm algorithm
Algorithm: GCM for both footer and columns (default), or GCM footer + CTR columns.
void classify(const std::string &column_name, ColumnClassification cls)
Classify a column for encryption.
bool encrypt_footer
Whether to encrypt the footer (true) or sign it with HMAC (false).
EncryptionConfig build_config(const KeyHandle &master_key) const
Build the low-level EncryptionConfig from this facade.
Top-level configuration structure that drives FileEncryptor / FileDecryptor.
bool encrypt_footer
If true, the footer is encrypted.
std::vector< uint8_t > footer_key
32-byte AES-256 key for encrypting the Parquet footer (FileMetaData).
KeyMode key_mode
INTERNAL: keys stored in file metadata. EXTERNAL: KMS references only.
std::string aad_prefix
AAD prefix – typically a file identifier or URI.
EncryptionAlgorithm algorithm
Encryption algorithm (GCM everywhere, or GCM-footer + CTR-columns).
std::vector< ColumnKeySpec > column_keys
Per-column key specifications. Columns listed here get their own key.