6#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
7#error "signet/ai/audit_chain.hpp requires SIGNET_ENABLE_COMMERCIAL=ON (AGPL-3.0 commercial tier). See LICENSE_COMMERCIAL."
80class AuditChainWriter;
81class AuditChainVerifier;
111 static std::atomic<int64_t> last_ns{0};
115 auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
116 std::chrono::system_clock::now().time_since_epoch()).count();
117 int64_t
expected = last_ns.load(std::memory_order_acquire);
119 while (!last_ns.compare_exchange_weak(
expected, ns,
120 std::memory_order_release, std::memory_order_acquire)) {
150inline std::string
hash_to_hex(
const std::array<uint8_t, 32>& hash) {
151 static constexpr char hex_chars[] =
"0123456789abcdef";
154 for (uint8_t
byte : hash) {
155 result.push_back(hex_chars[(
byte >> 4) & 0x0F]);
156 result.push_back(hex_chars[
byte & 0x0F]);
165 if (hex.size() != 64) {
167 "hex_to_hash: expected 64 hex characters, got "
168 + std::to_string(hex.size())};
171 std::array<uint8_t, 32> hash{};
172 for (
size_t i = 0; i < 32; ++i) {
173 char hi = hex[i * 2];
174 char lo = hex[i * 2 + 1];
176 auto hex_val = [](
char c) ->
int {
177 if (c >=
'0' && c <=
'9')
return c -
'0';
178 if (c >=
'a' && c <=
'f')
return 10 + (c -
'a');
179 if (c >=
'A' && c <=
'F')
return 10 + (c -
'A');
183 int hi_val = hex_val(hi);
184 int lo_val = hex_val(lo);
186 if (hi_val < 0 || lo_val < 0) {
188 "hex_to_hash: invalid hex character at position "
189 + std::to_string(i * 2)};
192 hash[i] =
static_cast<uint8_t
>((hi_val << 4) | lo_val);
205 std::ostringstream oss;
207 << std::hex << std::setfill(
'0') << std::setw(16)
208 <<
static_cast<uint64_t
>(ts);
215namespace detail::audit {
219 auto v =
static_cast<uint64_t
>(value);
220 dst[0] =
static_cast<uint8_t
>(v);
221 dst[1] =
static_cast<uint8_t
>(v >> 8);
222 dst[2] =
static_cast<uint8_t
>(v >> 16);
223 dst[3] =
static_cast<uint8_t
>(v >> 24);
224 dst[4] =
static_cast<uint8_t
>(v >> 32);
225 dst[5] =
static_cast<uint8_t
>(v >> 40);
226 dst[6] =
static_cast<uint8_t
>(v >> 48);
227 dst[7] =
static_cast<uint8_t
>(v >> 56);
232 uint64_t v =
static_cast<uint64_t
>(src[0])
233 | (
static_cast<uint64_t
>(src[1]) << 8)
234 | (
static_cast<uint64_t
>(src[2]) << 16)
235 | (
static_cast<uint64_t
>(src[3]) << 24)
236 | (
static_cast<uint64_t
>(src[4]) << 32)
237 | (
static_cast<uint64_t
>(src[5]) << 40)
238 | (
static_cast<uint64_t
>(src[6]) << 48)
239 | (
static_cast<uint64_t
>(src[7]) << 56);
240 return static_cast<int64_t
>(v);
245 dst[0] =
static_cast<uint8_t
>(value);
246 dst[1] =
static_cast<uint8_t
>(value >> 8);
247 dst[2] =
static_cast<uint8_t
>(value >> 16);
248 dst[3] =
static_cast<uint8_t
>(value >> 24);
253 return static_cast<uint32_t
>(src[0])
254 | (
static_cast<uint32_t
>(src[1]) << 8)
255 | (
static_cast<uint32_t
>(src[2]) << 16)
256 | (
static_cast<uint32_t
>(src[3]) << 24);
302 uint8_t preimage[80];
306 std::memcpy(preimage + 16,
prev_hash.data(), 32);
307 std::memcpy(preimage + 48,
data_hash.data(), 32);
318 [[nodiscard]]
inline bool verify()
const {
319 uint8_t preimage[80];
323 std::memcpy(preimage + 16,
prev_hash.data(), 32);
324 std::memcpy(preimage + 48,
data_hash.data(), 32);
330 for (
size_t i = 0; i < 32; ++i) {
348 [[nodiscard]]
inline std::vector<uint8_t>
serialize()
const {
353 std::memcpy(buf.data() + 16,
prev_hash.data(), 32);
354 std::memcpy(buf.data() + 48,
data_hash.data(), 32);
355 std::memcpy(buf.data() + 80,
entry_hash.data(), 32);
367 const uint8_t* data,
size_t size) {
371 "HashChainEntry::deserialize: need "
373 +
" bytes, got " + std::to_string(size)};
379 std::memcpy(entry.
prev_hash.data(), data + 16, 32);
380 std::memcpy(entry.
data_hash.data(), data + 48, 32);
381 std::memcpy(entry.
entry_hash.data(), data + 80, 32);
416 auto gate = commercial::require_feature(
"AuditChainWriter");
431 int64_t timestamp_ns) {
442 entries_.push_back(entry);
458 [[nodiscard]]
inline int64_t
length()
const {
459 return static_cast<int64_t
>(entries_.size());
465 [[nodiscard]]
inline std::array<uint8_t, 32>
last_hash()
const {
470 [[nodiscard]]
inline const std::vector<HashChainEntry>&
entries()
const {
483 if (entries_.size() > UINT32_MAX) {
484 throw std::overflow_error(
"Audit chain too large to serialize");
486 auto count =
static_cast<uint32_t
>(entries_.size());
487 std::vector<uint8_t> buf;
491 uint8_t count_buf[4];
493 buf.insert(buf.end(), count_buf, count_buf + 4);
496 for (
const auto& entry : entries_) {
497 auto entry_bytes = entry.serialize();
498 buf.insert(buf.end(), entry_bytes.begin(), entry_bytes.end());
511 inline void reset(std::array<uint8_t, 32> initial_prev_hash = {}, int64_t initial_seq = 0) {
513 last_hash_ = initial_prev_hash;
514 next_seq_ = initial_seq;
518 std::vector<HashChainEntry> entries_;
519 std::array<uint8_t, 32> last_hash_;
560 const uint8_t* chain_data,
size_t chain_size) {
565 if (chain_size < 4) {
566 result.
error_message =
"chain data too small for header (need >= 4 bytes)";
575 if (expected_size < 4 || chain_size < expected_size) {
577 + std::to_string(expected_size)
578 +
" bytes for " + std::to_string(count)
579 +
" entries, got " + std::to_string(chain_size);
584 std::vector<HashChainEntry> entries;
585 entries.reserve(count);
587 const uint8_t* ptr = chain_data + 4;
588 for (uint32_t i = 0; i < count; ++i) {
594 + std::to_string(i) +
": "
595 + entry_result.error().message;
600 entries.push_back(std::move(*entry_result));
619 const std::vector<HashChainEntry>& entries) {
623 if (entries.empty()) {
630 std::array<uint8_t, 32> expected_prev_hash{};
632 for (
size_t i = 0; i < entries.size(); ++i) {
633 const auto& entry = entries[i];
636 if (entry.sequence_number !=
static_cast<int64_t
>(i)) {
637 result.
valid =
false;
641 +
": expected sequence_number " + std::to_string(i)
642 +
", got " + std::to_string(entry.sequence_number);
647 if (entry.prev_hash != expected_prev_hash) {
648 result.
valid =
false;
652 +
": prev_hash mismatch (chain link broken)";
657 if (!entry.verify()) {
658 result.
valid =
false;
662 +
": entry_hash does not match recomputed hash (data tampered)";
667 if (i > 0 && entry.timestamp_ns < entries[i - 1].timestamp_ns) {
668 result.
valid =
false;
672 +
": timestamp_ns (" + std::to_string(entry.timestamp_ns)
673 +
") < previous (" + std::to_string(entries[i - 1].timestamp_ns)
674 +
") — ordering violation";
679 expected_prev_hash = entry.entry_hash;
699 const std::array<uint8_t, 32>& file1_last_hash,
700 const std::vector<HashChainEntry>& file2_entries) {
702 if (file2_entries.empty()) {
706 return file2_entries[0].prev_hash == file1_last_hash;
723 const std::array<uint8_t, 32>& expected_prev_hash) {
725 if (entry.
prev_hash != expected_prev_hash) {
795 [[nodiscard]]
inline std::vector<std::pair<std::string, std::string>>
797 std::vector<std::pair<std::string, std::string>> kvs;
798 kvs.emplace_back(
"signetstack.audit.chain_id",
chain_id);
799 kvs.emplace_back(
"signetstack.audit.first_seq", std::to_string(
start_sequence));
800 kvs.emplace_back(
"signetstack.audit.last_seq", std::to_string(
end_sequence));
801 kvs.emplace_back(
"signetstack.audit.first_hash",
first_hash);
802 kvs.emplace_back(
"signetstack.audit.last_hash",
last_hash);
803 kvs.emplace_back(
"signetstack.audit.prev_file_hash",
prev_file_hash);
804 kvs.emplace_back(
"signetstack.audit.record_count", std::to_string(
record_count));
806 kvs.emplace_back(
"signetstack.audit.record_type",
record_type);
823 std::ostringstream oss;
841 const std::string& s) {
846 auto parse_pairs = [](
const std::string& input)
847 -> std::vector<std::pair<std::string, std::string>> {
849 std::vector<std::pair<std::string, std::string>> pairs;
852 while (pos < input.size()) {
854 size_t semi = input.find(
';', pos);
855 if (semi == std::string::npos) {
860 std::string token = input.substr(pos, semi - pos);
863 size_t eq = token.find(
'=');
864 if (eq != std::string::npos) {
865 std::string key = token.substr(0, eq);
866 std::string value = token.substr(eq + 1);
867 pairs.emplace_back(std::move(key), std::move(value));
876 auto pairs = parse_pairs(s);
879 auto find_key = [&pairs](
const std::string& key) ->
const std::string* {
880 for (
const auto& [k, v] : pairs) {
881 if (k == key)
return &v;
887 const auto* chain_id_val = find_key(
"chain_id");
890 "AuditMetadata: missing required field 'chain_id'"};
895 const auto* start_val = find_key(
"start_seq");
898 "AuditMetadata: missing required field 'start_seq'"};
902 }
catch (
const std::exception&) {
904 "AuditMetadata: invalid start_seq value: " + *start_val};
908 const auto* end_val = find_key(
"end_seq");
911 "AuditMetadata: missing required field 'end_seq'"};
915 }
catch (
const std::exception&) {
917 "AuditMetadata: invalid end_seq value: " + *end_val};
921 const auto* first_prev_val = find_key(
"first_prev");
922 if (!first_prev_val) {
924 "AuditMetadata: missing required field 'first_prev'"};
926 auto first_prev_result =
hex_to_hash(*first_prev_val);
927 if (!first_prev_result) {
929 "AuditMetadata: invalid first_prev hash: "
930 + first_prev_result.error().
message};
935 const auto* last_hash_val = find_key(
"last_hash");
936 if (!last_hash_val) {
938 "AuditMetadata: missing required field 'last_hash'"};
940 auto last_hash_result =
hex_to_hash(*last_hash_val);
941 if (!last_hash_result) {
943 "AuditMetadata: invalid last_hash: "
944 + last_hash_result.error().
message};
949 const auto* created_val = find_key(
"created_by");
969 const std::string& chain_id) {
971 if (writer.
length() == 0) {
973 "build_audit_metadata: writer has no entries"};
976 const auto& entries = writer.
entries();
999 const uint8_t* chain_data,
size_t chain_size) {
1002 if (chain_size < 4) {
1004 "chain data too small for header"};
1011 if (expected_size < 4 || chain_size < expected_size) {
1013 "chain data truncated: expected "
1014 + std::to_string(expected_size) +
" bytes, got "
1015 + std::to_string(chain_size)};
1018 std::vector<HashChainEntry> entries;
1019 entries.reserve(count);
1021 const uint8_t* ptr = chain_data + 4;
1022 for (uint32_t i = 0; i < count; ++i) {
1024 if (!entry_result) {
1026 "entry " + std::to_string(i) +
": "
1027 + entry_result.error().
message};
1029 entries.push_back(std::move(*entry_result));
1035 if (!result.valid) {
Verifies hash chain integrity.
static VerificationResult verify(const uint8_t *chain_data, size_t chain_size)
Verify a chain from serialized bytes.
static bool verify_entry(const HashChainEntry &entry, const std::array< uint8_t, 32 > &expected_prev_hash)
Verify a single entry against an expected prev_hash.
static bool verify_continuity(const std::array< uint8_t, 32 > &file1_last_hash, const std::vector< HashChainEntry > &file2_entries)
Check that two chain segments link together across files.
static VerificationResult verify(const std::vector< HashChainEntry > &entries)
Verify a vector of HashChainEntry objects.
Builds SHA-256 hash chains during Parquet writes.
const std::vector< HashChainEntry > & entries() const
Return a const reference to the internal entry list.
int64_t length() const
Return the number of entries in the chain.
std::vector< uint8_t > serialize_chain() const
Serialize the entire chain to bytes.
AuditChainWriter()
Construct a new writer with an empty chain.
HashChainEntry append(const uint8_t *record_data, size_t record_size, int64_t timestamp_ns)
Append a record to the chain with an explicit timestamp.
void reset(std::array< uint8_t, 32 > initial_prev_hash={}, int64_t initial_seq=0)
Clear the chain and optionally set an initial prev_hash.
std::array< uint8_t, 32 > last_hash() const
Return the entry_hash of the last entry in the chain.
HashChainEntry append(const uint8_t *record_data, size_t record_size)
Append a record with auto-generated timestamp from the system clock.
A lightweight result type that holds either a success value of type T or an Error.
std::array< uint8_t, 32 > sha256(const uint8_t *data, size_t size)
Compute SHA-256 hash of arbitrary-length input.
int64_t read_le64(const uint8_t *src)
Read an int64_t from 8 little-endian bytes.
void write_le64(uint8_t *dst, int64_t value)
Write an int64_t as 8 little-endian bytes to the output buffer.
uint32_t read_le32(const uint8_t *src)
Read a uint32_t from 4 little-endian bytes.
void write_le32(uint8_t *dst, uint32_t value)
Write a uint32_t as 4 little-endian bytes.
constexpr const char * SIGNET_CREATED_BY
Default "created_by" string embedded in every Parquet footer.
int64_t now_ns()
Return the current time as nanoseconds since the Unix epoch (UTC).
expected< std::array< uint8_t, 32 > > hex_to_hash(const std::string &hex)
Convert a 64-character lowercase hex string back to a 32-byte hash.
expected< AuditMetadata > build_audit_metadata(const AuditChainWriter &writer, const std::string &chain_id)
Build an AuditMetadata from a populated AuditChainWriter.
constexpr size_t HASH_CHAIN_ENTRY_SIZE
Chain summary stored in Parquet key-value metadata.
expected< std::vector< HashChainEntry > > deserialize_and_verify_chain(const uint8_t *chain_data, size_t chain_size)
Deserialize and verify a chain from serialized bytes in one call.
std::string hash_to_hex(const std::array< uint8_t, 32 > &hash)
Convert a 32-byte SHA-256 hash to a lowercase hexadecimal string (64 chars).
@ HASH_CHAIN_BROKEN
The cryptographic audit hash chain is broken, indicating data tampering.
@ INVALID_FILE
The file is not a valid Parquet file (e.g. missing or wrong magic bytes).
@ INTERNAL_ERROR
An unexpected internal error that does not fit any other category.
@ CORRUPT_PAGE
A data page failed integrity checks (bad CRC, truncated, or exceeds size limits).
std::string generate_chain_id()
Generate a simple chain identifier based on the current timestamp.
SHA-256 hash function (NIST FIPS 180-4).
Result of a full chain verification.
bool valid
True if the entire chain passed all integrity checks.
int64_t first_bad_index
Index of the first entry that failed verification, or -1 if all entries are valid.
int64_t entries_checked
Number of entries that were successfully verified before a failure was detected (or the total count i...
std::string error_message
Human-readable description of the verification outcome.
NTP/PTP clock synchronization status for MiFID II RTS 25 Art.3.
int stratum
NTP stratum (1=primary, 2-15=secondary).
int64_t max_error_ns
Maximum estimated error (ns).
bool meets_rts25_standard() const
MiFID II RTS 25 Art.2: Non-HFT max divergence 1ms.
bool meets_rts25_hft() const
MiFID II RTS 25 Art.2: HFT gateway max divergence 100μs.
std::string sync_source
NTP/PTP server address.
bool is_synchronized
Whether clock is synced to NTP/PTP.
int64_t offset_ns
Estimated offset from UTC (absolute, ns).
int64_t last_check_ns
Timestamp of last sync check.
Lightweight error value carrying an ErrorCode and a human-readable message.
std::string message
A human-readable description of what went wrong (may be empty for OK).
A single link in the cryptographic hash chain.
int64_t sequence_number
0-indexed position in the chain, monotonically increasing.
void compute_entry_hash()
Derive entry_hash from the other fields.
std::array< uint8_t, 32 > entry_hash
SHA-256 commitment over (sequence_number, timestamp_ns, prev_hash, data_hash).
std::vector< uint8_t > serialize() const
Serialize this entry as 112 little-endian bytes.
bool verify() const
Check that entry_hash is consistent with the other fields.
static expected< HashChainEntry > deserialize(const uint8_t *data, size_t size)
Reconstruct a HashChainEntry from 112 bytes.
int64_t timestamp_ns
Nanoseconds since Unix epoch when this entry was created.
std::array< uint8_t, 32 > prev_hash
SHA-256 hash of the previous entry (all zeros for the first entry, or a user-supplied continuation ha...
std::array< uint8_t, 32 > data_hash
SHA-256 hash of the record/row data that this entry covers.
Parquet format enumerations, type traits, and statistics structs.