Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
audit_chain.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3// See LICENSE_COMMERCIAL for full terms.
4#pragma once
5
6#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
7#error "signet/ai/audit_chain.hpp requires SIGNET_ENABLE_COMMERCIAL=ON (AGPL-3.0 commercial tier). See LICENSE_COMMERCIAL."
8#endif
9
10// ---------------------------------------------------------------------------
11// audit_chain.hpp -- Cryptographic hash chain for tamper-evident audit trails
12//
13// Provides a SHA-256-based hash chain that guarantees the integrity and
14// ordering of AI decision records stored in Parquet files. Each entry in
15// the chain commits to all previous entries through a cryptographic link,
16// making any retrospective tampering (insertion, deletion, reordering, or
17// modification of records) computationally detectable.
18//
19// Architecture:
20//
21// AuditChainWriter -- Builds hash chains during Parquet writes. Each
22// record is hashed and linked to its predecessor.
23//
24// AuditChainVerifier -- Verifies chain integrity during reads. Can
25// verify individual entries, entire chains, or
26// continuity across multiple Parquet files.
27//
28// AuditMetadata -- Stores chain summary in Parquet key-value
29// metadata for fast cross-file verification
30// without reading every entry.
31//
32// Hash chain algorithm:
33//
34// entry_hash = SHA-256( LE64(sequence_number)
35// || LE64(timestamp_ns)
36// || prev_hash[32]
37// || data_hash[32] )
38//
39// where prev_hash is all zeros for the first entry (or a user-supplied
40// continuation hash when chaining across files).
41//
42// Binary serialization (112 bytes per entry, little-endian):
43//
44// [0:8) sequence_number (int64_t LE)
45// [8:16) timestamp_ns (int64_t LE)
46// [16:48) prev_hash (32 bytes)
47// [48:80) data_hash (32 bytes)
48// [80:112) entry_hash (32 bytes)
49//
50// Chain serialization: 4-byte entry count (uint32_t LE) followed by
51// N entries of 112 bytes each.
52//
53// Header-only. No external dependencies beyond the project's own SHA-256
54// implementation in signet/crypto/post_quantum.hpp.
55//
56// Part of SignetStack Signet Forge -- Phase 7: AI Decision Audit Trail.
57// ---------------------------------------------------------------------------
58
60#include "signet/error.hpp"
61#include "signet/types.hpp"
62
63#include <array>
64#include <atomic>
65#include <chrono>
66#include <cstdint>
67#include <cstring>
68#include <iomanip>
69#include <sstream>
70#include <stdexcept>
71#include <string>
72#include <vector>
73
74namespace signet::forge {
75
76// ===========================================================================
77// Forward declarations
78// ===========================================================================
79struct HashChainEntry;
80class AuditChainWriter;
81class AuditChainVerifier;
82struct AuditMetadata;
83
84// ===========================================================================
85// Constants
86// ===========================================================================
87
89inline constexpr size_t HASH_CHAIN_ENTRY_SIZE = 112;
90
91// ===========================================================================
92// Utility functions
93// ===========================================================================
94
110inline int64_t now_ns() {
111 static std::atomic<int64_t> last_ns{0};
112 // CWE-362: system_clock (UTC) + atomic CAS loop ensures monotonicity
113 // across concurrent callers without mutex overhead.
114 // R-5: system_clock provides UTC traceability per MiFID II RTS 25 Art.2.
115 auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
116 std::chrono::system_clock::now().time_since_epoch()).count();
117 int64_t expected = last_ns.load(std::memory_order_acquire);
118 while (ns <= expected) { ns = expected + 1; }
119 while (!last_ns.compare_exchange_weak(expected, ns,
120 std::memory_order_release, std::memory_order_acquire)) {
121 if (ns <= expected) ns = expected + 1;
122 }
123 return ns;
124}
125
132 bool is_synchronized{false};
133 int stratum{0};
134 int64_t offset_ns{0};
135 int64_t max_error_ns{0};
136 std::string sync_source;
137 int64_t last_check_ns{0};
138
140 [[nodiscard]] bool meets_rts25_hft() const {
141 return is_synchronized && max_error_ns <= 100'000;
142 }
144 [[nodiscard]] bool meets_rts25_standard() const {
145 return is_synchronized && max_error_ns <= 1'000'000;
146 }
147};
148
150inline std::string hash_to_hex(const std::array<uint8_t, 32>& hash) {
151 static constexpr char hex_chars[] = "0123456789abcdef";
152 std::string result;
153 result.reserve(64);
154 for (uint8_t byte : hash) {
155 result.push_back(hex_chars[(byte >> 4) & 0x0F]);
156 result.push_back(hex_chars[ byte & 0x0F]);
157 }
158 return result;
159}
160
164inline expected<std::array<uint8_t, 32>> hex_to_hash(const std::string& hex) {
165 if (hex.size() != 64) {
167 "hex_to_hash: expected 64 hex characters, got "
168 + std::to_string(hex.size())};
169 }
170
171 std::array<uint8_t, 32> hash{};
172 for (size_t i = 0; i < 32; ++i) {
173 char hi = hex[i * 2];
174 char lo = hex[i * 2 + 1];
175
176 auto hex_val = [](char c) -> int {
177 if (c >= '0' && c <= '9') return c - '0';
178 if (c >= 'a' && c <= 'f') return 10 + (c - 'a');
179 if (c >= 'A' && c <= 'F') return 10 + (c - 'A');
180 return -1;
181 };
182
183 int hi_val = hex_val(hi);
184 int lo_val = hex_val(lo);
185
186 if (hi_val < 0 || lo_val < 0) {
188 "hex_to_hash: invalid hex character at position "
189 + std::to_string(i * 2)};
190 }
191
192 hash[i] = static_cast<uint8_t>((hi_val << 4) | lo_val);
193 }
194
195 return hash;
196}
197
203inline std::string generate_chain_id() {
204 int64_t ts = now_ns();
205 std::ostringstream oss;
206 oss << "chain-"
207 << std::hex << std::setfill('0') << std::setw(16)
208 << static_cast<uint64_t>(ts);
209 return oss.str();
210}
211
212// ===========================================================================
213// Internal helpers (detail namespace)
214// ===========================================================================
215namespace detail::audit {
216
218inline void write_le64(uint8_t* dst, int64_t value) {
219 auto v = static_cast<uint64_t>(value);
220 dst[0] = static_cast<uint8_t>(v);
221 dst[1] = static_cast<uint8_t>(v >> 8);
222 dst[2] = static_cast<uint8_t>(v >> 16);
223 dst[3] = static_cast<uint8_t>(v >> 24);
224 dst[4] = static_cast<uint8_t>(v >> 32);
225 dst[5] = static_cast<uint8_t>(v >> 40);
226 dst[6] = static_cast<uint8_t>(v >> 48);
227 dst[7] = static_cast<uint8_t>(v >> 56);
228}
229
231inline int64_t read_le64(const uint8_t* src) {
232 uint64_t v = static_cast<uint64_t>(src[0])
233 | (static_cast<uint64_t>(src[1]) << 8)
234 | (static_cast<uint64_t>(src[2]) << 16)
235 | (static_cast<uint64_t>(src[3]) << 24)
236 | (static_cast<uint64_t>(src[4]) << 32)
237 | (static_cast<uint64_t>(src[5]) << 40)
238 | (static_cast<uint64_t>(src[6]) << 48)
239 | (static_cast<uint64_t>(src[7]) << 56);
240 return static_cast<int64_t>(v);
241}
242
244inline void write_le32(uint8_t* dst, uint32_t value) {
245 dst[0] = static_cast<uint8_t>(value);
246 dst[1] = static_cast<uint8_t>(value >> 8);
247 dst[2] = static_cast<uint8_t>(value >> 16);
248 dst[3] = static_cast<uint8_t>(value >> 24);
249}
250
252inline uint32_t read_le32(const uint8_t* src) {
253 return static_cast<uint32_t>(src[0])
254 | (static_cast<uint32_t>(src[1]) << 8)
255 | (static_cast<uint32_t>(src[2]) << 16)
256 | (static_cast<uint32_t>(src[3]) << 24);
257}
258
259} // namespace detail::audit
260
276 int64_t sequence_number = 0;
277
279 int64_t timestamp_ns = 0;
280
283 std::array<uint8_t, 32> prev_hash{};
284
286 std::array<uint8_t, 32> data_hash{};
287
291 std::array<uint8_t, 32> entry_hash{};
292
300 inline void compute_entry_hash() {
301 // Build the preimage: 8 + 8 + 32 + 32 = 80 bytes
302 uint8_t preimage[80];
303
306 std::memcpy(preimage + 16, prev_hash.data(), 32);
307 std::memcpy(preimage + 48, data_hash.data(), 32);
308
309 entry_hash = crypto::detail::sha256::sha256(preimage, sizeof(preimage));
310 }
311
318 [[nodiscard]] inline bool verify() const {
319 uint8_t preimage[80];
320
323 std::memcpy(preimage + 16, prev_hash.data(), 32);
324 std::memcpy(preimage + 48, data_hash.data(), 32);
325
326 auto expected = crypto::detail::sha256::sha256(preimage, sizeof(preimage));
327
328 // Constant-time comparison
329 uint8_t diff = 0;
330 for (size_t i = 0; i < 32; ++i) {
331 diff |= entry_hash[i] ^ expected[i];
332 }
333
334 return diff == 0;
335 }
336
348 [[nodiscard]] inline std::vector<uint8_t> serialize() const {
349 std::vector<uint8_t> buf(HASH_CHAIN_ENTRY_SIZE);
350
353 std::memcpy(buf.data() + 16, prev_hash.data(), 32);
354 std::memcpy(buf.data() + 48, data_hash.data(), 32);
355 std::memcpy(buf.data() + 80, entry_hash.data(), 32);
356
357 return buf;
358 }
359
366 [[nodiscard]] static inline expected<HashChainEntry> deserialize(
367 const uint8_t* data, size_t size) {
368
369 if (size < HASH_CHAIN_ENTRY_SIZE) {
371 "HashChainEntry::deserialize: need "
372 + std::to_string(HASH_CHAIN_ENTRY_SIZE)
373 + " bytes, got " + std::to_string(size)};
374 }
375
376 HashChainEntry entry;
378 entry.timestamp_ns = detail::audit::read_le64(data + 8);
379 std::memcpy(entry.prev_hash.data(), data + 16, 32);
380 std::memcpy(entry.data_hash.data(), data + 48, 32);
381 std::memcpy(entry.entry_hash.data(), data + 80, 32);
382
383 return entry;
384 }
385};
386
409public:
414 : last_hash_{}
415 , next_seq_(0) {
416 auto gate = commercial::require_feature("AuditChainWriter");
417 (void)gate;
418 }
419
430 inline HashChainEntry append(const uint8_t* record_data, size_t record_size,
431 int64_t timestamp_ns) {
432 HashChainEntry entry;
433 entry.sequence_number = next_seq_;
434 entry.timestamp_ns = timestamp_ns;
435 entry.prev_hash = last_hash_;
436 entry.data_hash = crypto::detail::sha256::sha256(record_data, record_size);
437
438 entry.compute_entry_hash();
439
440 last_hash_ = entry.entry_hash;
441 ++next_seq_;
442 entries_.push_back(entry);
443
444 return entry;
445 }
446
453 inline HashChainEntry append(const uint8_t* record_data, size_t record_size) {
454 return append(record_data, record_size, now_ns());
455 }
456
458 [[nodiscard]] inline int64_t length() const {
459 return static_cast<int64_t>(entries_.size());
460 }
461
465 [[nodiscard]] inline std::array<uint8_t, 32> last_hash() const {
466 return last_hash_;
467 }
468
470 [[nodiscard]] inline const std::vector<HashChainEntry>& entries() const {
471 return entries_;
472 }
473
480 [[nodiscard]] inline std::vector<uint8_t> serialize_chain() const {
481 // CWE-190: Integer Overflow or Wraparound — guard against entry count
482 // exceeding uint32_t range before narrowing cast to the 4-byte header.
483 if (entries_.size() > UINT32_MAX) {
484 throw std::overflow_error("Audit chain too large to serialize");
485 }
486 auto count = static_cast<uint32_t>(entries_.size());
487 std::vector<uint8_t> buf;
488 buf.reserve(4 + static_cast<size_t>(count) * HASH_CHAIN_ENTRY_SIZE);
489
490 // Write entry count
491 uint8_t count_buf[4];
492 detail::audit::write_le32(count_buf, count);
493 buf.insert(buf.end(), count_buf, count_buf + 4);
494
495 // Write each entry
496 for (const auto& entry : entries_) {
497 auto entry_bytes = entry.serialize();
498 buf.insert(buf.end(), entry_bytes.begin(), entry_bytes.end());
499 }
500
501 return buf;
502 }
503
511 inline void reset(std::array<uint8_t, 32> initial_prev_hash = {}, int64_t initial_seq = 0) {
512 entries_.clear();
513 last_hash_ = initial_prev_hash;
514 next_seq_ = initial_seq;
515 }
516
517private:
518 std::vector<HashChainEntry> entries_;
519 std::array<uint8_t, 32> last_hash_;
520 int64_t next_seq_;
521};
522
532public:
536 bool valid = false;
537
540 int64_t entries_checked = 0;
541
544 int64_t first_bad_index = -1;
545
548 std::string error_message;
549 };
550
559 [[nodiscard]] static inline VerificationResult verify(
560 const uint8_t* chain_data, size_t chain_size) {
561
562 VerificationResult result;
563
564 // Need at least the 4-byte count header
565 if (chain_size < 4) {
566 result.error_message = "chain data too small for header (need >= 4 bytes)";
567 return result;
568 }
569
570 uint32_t count = detail::audit::read_le32(chain_data);
571 size_t expected_size = 4 + static_cast<size_t>(count) * HASH_CHAIN_ENTRY_SIZE;
572
573 // CWE-400, CWE-789: Memory Allocation with Excessive Size Value —
574 // validate that count * ENTRY_SIZE fits in chain_size before reserve().
575 if (expected_size < 4 || chain_size < expected_size) {
576 result.error_message = "chain data truncated: expected "
577 + std::to_string(expected_size)
578 + " bytes for " + std::to_string(count)
579 + " entries, got " + std::to_string(chain_size);
580 return result;
581 }
582
583 // Deserialize all entries
584 std::vector<HashChainEntry> entries;
585 entries.reserve(count);
586
587 const uint8_t* ptr = chain_data + 4;
588 for (uint32_t i = 0; i < count; ++i) {
589 auto entry_result = HashChainEntry::deserialize(
591
592 if (!entry_result) {
593 result.error_message = "failed to deserialize entry "
594 + std::to_string(i) + ": "
595 + entry_result.error().message;
596 result.first_bad_index = static_cast<int64_t>(i);
597 return result;
598 }
599
600 entries.push_back(std::move(*entry_result));
602 }
603
604 return verify(entries);
605 }
606
618 [[nodiscard]] static inline VerificationResult verify(
619 const std::vector<HashChainEntry>& entries) {
620
621 VerificationResult result;
622
623 if (entries.empty()) {
624 result.valid = true;
625 result.entries_checked = 0;
626 result.first_bad_index = -1;
627 return result;
628 }
629
630 std::array<uint8_t, 32> expected_prev_hash{}; // zeros for first entry
631
632 for (size_t i = 0; i < entries.size(); ++i) {
633 const auto& entry = entries[i];
634
635 // Check 1: sequence number must match position
636 if (entry.sequence_number != static_cast<int64_t>(i)) {
637 result.valid = false; // defensive: CWE-705
638 result.entries_checked = static_cast<int64_t>(i);
639 result.first_bad_index = static_cast<int64_t>(i);
640 result.error_message = "entry " + std::to_string(i)
641 + ": expected sequence_number " + std::to_string(i)
642 + ", got " + std::to_string(entry.sequence_number);
643 return result;
644 }
645
646 // Check 2: prev_hash must match expected
647 if (entry.prev_hash != expected_prev_hash) {
648 result.valid = false; // defensive: MiFID II RTS 24 Art.4
649 result.entries_checked = static_cast<int64_t>(i);
650 result.first_bad_index = static_cast<int64_t>(i);
651 result.error_message = "entry " + std::to_string(i)
652 + ": prev_hash mismatch (chain link broken)";
653 return result;
654 }
655
656 // Check 3: entry_hash must be self-consistent
657 if (!entry.verify()) {
658 result.valid = false; // defensive: tamper detection
659 result.entries_checked = static_cast<int64_t>(i);
660 result.first_bad_index = static_cast<int64_t>(i);
661 result.error_message = "entry " + std::to_string(i)
662 + ": entry_hash does not match recomputed hash (data tampered)";
663 return result;
664 }
665
666 // Check 4: timestamp must be non-decreasing
667 if (i > 0 && entry.timestamp_ns < entries[i - 1].timestamp_ns) {
668 result.valid = false; // defensive: ordering violation
669 result.entries_checked = static_cast<int64_t>(i);
670 result.first_bad_index = static_cast<int64_t>(i);
671 result.error_message = "entry " + std::to_string(i)
672 + ": timestamp_ns (" + std::to_string(entry.timestamp_ns)
673 + ") < previous (" + std::to_string(entries[i - 1].timestamp_ns)
674 + ") — ordering violation";
675 return result;
676 }
677
678 // Advance the expected prev_hash for the next entry
679 expected_prev_hash = entry.entry_hash;
680 }
681
682 // All entries passed
683 result.valid = true;
684 result.entries_checked = static_cast<int64_t>(entries.size());
685 result.first_bad_index = -1;
686 return result;
687 }
688
698 [[nodiscard]] static inline bool verify_continuity(
699 const std::array<uint8_t, 32>& file1_last_hash,
700 const std::vector<HashChainEntry>& file2_entries) {
701
702 if (file2_entries.empty()) {
703 return true; // empty continuation is vacuously valid
704 }
705
706 return file2_entries[0].prev_hash == file1_last_hash;
707 }
708
721 [[nodiscard]] static inline bool verify_entry(
722 const HashChainEntry& entry,
723 const std::array<uint8_t, 32>& expected_prev_hash) {
724
725 if (entry.prev_hash != expected_prev_hash) {
726 return false;
727 }
728
729 return entry.verify();
730 }
731};
732
751 std::string chain_id;
752
754 int64_t start_sequence = 0;
755
757 int64_t end_sequence = 0;
758
761 std::array<uint8_t, 32> first_prev_hash{};
762
765 std::array<uint8_t, 32> last_entry_hash{};
766
769
774
776 std::string first_hash;
777
779 std::string last_hash;
780
782 std::string prev_file_hash;
783
785 int64_t record_count = 0;
786
788 std::string record_type;
790
795 [[nodiscard]] inline std::vector<std::pair<std::string, std::string>>
797 std::vector<std::pair<std::string, std::string>> kvs;
798 kvs.emplace_back("signetstack.audit.chain_id", chain_id);
799 kvs.emplace_back("signetstack.audit.first_seq", std::to_string(start_sequence));
800 kvs.emplace_back("signetstack.audit.last_seq", std::to_string(end_sequence));
801 kvs.emplace_back("signetstack.audit.first_hash", first_hash);
802 kvs.emplace_back("signetstack.audit.last_hash", last_hash);
803 kvs.emplace_back("signetstack.audit.prev_file_hash", prev_file_hash);
804 kvs.emplace_back("signetstack.audit.record_count", std::to_string(record_count));
805 if (!record_type.empty()) {
806 kvs.emplace_back("signetstack.audit.record_type", record_type);
807 }
808 return kvs;
809 }
810
822 [[nodiscard]] inline std::string serialize() const {
823 std::ostringstream oss;
824 oss << "chain_id=" << chain_id
825 << ";start_seq=" << start_sequence
826 << ";end_seq=" << end_sequence
827 << ";first_prev=" << hash_to_hex(first_prev_hash)
828 << ";last_hash=" << hash_to_hex(last_entry_hash)
829 << ";created_by=" << created_by;
830 return oss.str();
831 }
832
840 [[nodiscard]] static inline expected<AuditMetadata> deserialize(
841 const std::string& s) {
842
843 AuditMetadata meta;
844
845 // Parse semicolon-delimited key=value pairs into a map
846 auto parse_pairs = [](const std::string& input)
847 -> std::vector<std::pair<std::string, std::string>> {
848
849 std::vector<std::pair<std::string, std::string>> pairs;
850 size_t pos = 0;
851
852 while (pos < input.size()) {
853 // Find the next semicolon (or end of string)
854 size_t semi = input.find(';', pos);
855 if (semi == std::string::npos) {
856 semi = input.size();
857 }
858
859 // Extract the key=value substring
860 std::string token = input.substr(pos, semi - pos);
861
862 // Split on first '='
863 size_t eq = token.find('=');
864 if (eq != std::string::npos) {
865 std::string key = token.substr(0, eq);
866 std::string value = token.substr(eq + 1);
867 pairs.emplace_back(std::move(key), std::move(value));
868 }
869
870 pos = semi + 1;
871 }
872
873 return pairs;
874 };
875
876 auto pairs = parse_pairs(s);
877
878 // Helper to find a key
879 auto find_key = [&pairs](const std::string& key) -> const std::string* {
880 for (const auto& [k, v] : pairs) {
881 if (k == key) return &v;
882 }
883 return nullptr;
884 };
885
886 // chain_id (required)
887 const auto* chain_id_val = find_key("chain_id");
888 if (!chain_id_val) {
890 "AuditMetadata: missing required field 'chain_id'"};
891 }
892 meta.chain_id = *chain_id_val;
893
894 // start_seq (required)
895 const auto* start_val = find_key("start_seq");
896 if (!start_val) {
898 "AuditMetadata: missing required field 'start_seq'"};
899 }
900 try {
901 meta.start_sequence = std::stoll(*start_val);
902 } catch (const std::exception&) {
904 "AuditMetadata: invalid start_seq value: " + *start_val};
905 }
906
907 // end_seq (required)
908 const auto* end_val = find_key("end_seq");
909 if (!end_val) {
911 "AuditMetadata: missing required field 'end_seq'"};
912 }
913 try {
914 meta.end_sequence = std::stoll(*end_val);
915 } catch (const std::exception&) {
917 "AuditMetadata: invalid end_seq value: " + *end_val};
918 }
919
920 // first_prev (required, 64 hex chars)
921 const auto* first_prev_val = find_key("first_prev");
922 if (!first_prev_val) {
924 "AuditMetadata: missing required field 'first_prev'"};
925 }
926 auto first_prev_result = hex_to_hash(*first_prev_val);
927 if (!first_prev_result) {
929 "AuditMetadata: invalid first_prev hash: "
930 + first_prev_result.error().message};
931 }
932 meta.first_prev_hash = *first_prev_result;
933
934 // last_hash (required, 64 hex chars)
935 const auto* last_hash_val = find_key("last_hash");
936 if (!last_hash_val) {
938 "AuditMetadata: missing required field 'last_hash'"};
939 }
940 auto last_hash_result = hex_to_hash(*last_hash_val);
941 if (!last_hash_result) {
943 "AuditMetadata: invalid last_hash: "
944 + last_hash_result.error().message};
945 }
946 meta.last_entry_hash = *last_hash_result;
947
948 // created_by (optional, defaults to SIGNET_CREATED_BY)
949 const auto* created_val = find_key("created_by");
950 if (created_val) {
951 meta.created_by = *created_val;
952 }
953
954 return meta;
955 }
956};
957
968 const AuditChainWriter& writer,
969 const std::string& chain_id) {
970
971 if (writer.length() == 0) {
973 "build_audit_metadata: writer has no entries"};
974 }
975
976 const auto& entries = writer.entries();
977
978 AuditMetadata meta;
979 meta.chain_id = chain_id;
980 meta.start_sequence = entries.front().sequence_number;
981 meta.end_sequence = entries.back().sequence_number;
982 meta.first_prev_hash = entries.front().prev_hash;
983 meta.last_entry_hash = entries.back().entry_hash;
985
986 return meta;
987}
988
999 const uint8_t* chain_data, size_t chain_size) {
1000
1001 // Deserialize
1002 if (chain_size < 4) {
1004 "chain data too small for header"};
1005 }
1006
1007 uint32_t count = detail::audit::read_le32(chain_data);
1008 size_t expected_size = 4 + static_cast<size_t>(count) * HASH_CHAIN_ENTRY_SIZE;
1009
1010 // CWE-400, CWE-789: bounds check before reserve() prevents excessive allocation.
1011 if (expected_size < 4 || chain_size < expected_size) {
1013 "chain data truncated: expected "
1014 + std::to_string(expected_size) + " bytes, got "
1015 + std::to_string(chain_size)};
1016 }
1017
1018 std::vector<HashChainEntry> entries;
1019 entries.reserve(count);
1020
1021 const uint8_t* ptr = chain_data + 4;
1022 for (uint32_t i = 0; i < count; ++i) {
1023 auto entry_result = HashChainEntry::deserialize(ptr, HASH_CHAIN_ENTRY_SIZE);
1024 if (!entry_result) {
1026 "entry " + std::to_string(i) + ": "
1027 + entry_result.error().message};
1028 }
1029 entries.push_back(std::move(*entry_result));
1030 ptr += HASH_CHAIN_ENTRY_SIZE;
1031 }
1032
1033 // Verify
1034 auto result = AuditChainVerifier::verify(entries);
1035 if (!result.valid) {
1036 return Error{ErrorCode::HASH_CHAIN_BROKEN, result.error_message};
1037 }
1038
1039 return entries;
1040}
1041
1042} // namespace signet::forge
Verifies hash chain integrity.
static VerificationResult verify(const uint8_t *chain_data, size_t chain_size)
Verify a chain from serialized bytes.
static bool verify_entry(const HashChainEntry &entry, const std::array< uint8_t, 32 > &expected_prev_hash)
Verify a single entry against an expected prev_hash.
static bool verify_continuity(const std::array< uint8_t, 32 > &file1_last_hash, const std::vector< HashChainEntry > &file2_entries)
Check that two chain segments link together across files.
static VerificationResult verify(const std::vector< HashChainEntry > &entries)
Verify a vector of HashChainEntry objects.
Builds SHA-256 hash chains during Parquet writes.
const std::vector< HashChainEntry > & entries() const
Return a const reference to the internal entry list.
int64_t length() const
Return the number of entries in the chain.
std::vector< uint8_t > serialize_chain() const
Serialize the entire chain to bytes.
AuditChainWriter()
Construct a new writer with an empty chain.
HashChainEntry append(const uint8_t *record_data, size_t record_size, int64_t timestamp_ns)
Append a record to the chain with an explicit timestamp.
void reset(std::array< uint8_t, 32 > initial_prev_hash={}, int64_t initial_seq=0)
Clear the chain and optionally set an initial prev_hash.
std::array< uint8_t, 32 > last_hash() const
Return the entry_hash of the last entry in the chain.
HashChainEntry append(const uint8_t *record_data, size_t record_size)
Append a record with auto-generated timestamp from the system clock.
A lightweight result type that holds either a success value of type T or an Error.
Definition error.hpp:145
std::array< uint8_t, 32 > sha256(const uint8_t *data, size_t size)
Compute SHA-256 hash of arbitrary-length input.
Definition sha256.hpp:165
int64_t read_le64(const uint8_t *src)
Read an int64_t from 8 little-endian bytes.
void write_le64(uint8_t *dst, int64_t value)
Write an int64_t as 8 little-endian bytes to the output buffer.
uint32_t read_le32(const uint8_t *src)
Read a uint32_t from 4 little-endian bytes.
void write_le32(uint8_t *dst, uint32_t value)
Write a uint32_t as 4 little-endian bytes.
constexpr const char * SIGNET_CREATED_BY
Default "created_by" string embedded in every Parquet footer.
Definition types.hpp:203
int64_t now_ns()
Return the current time as nanoseconds since the Unix epoch (UTC).
expected< std::array< uint8_t, 32 > > hex_to_hash(const std::string &hex)
Convert a 64-character lowercase hex string back to a 32-byte hash.
expected< AuditMetadata > build_audit_metadata(const AuditChainWriter &writer, const std::string &chain_id)
Build an AuditMetadata from a populated AuditChainWriter.
constexpr size_t HASH_CHAIN_ENTRY_SIZE
Chain summary stored in Parquet key-value metadata.
expected< std::vector< HashChainEntry > > deserialize_and_verify_chain(const uint8_t *chain_data, size_t chain_size)
Deserialize and verify a chain from serialized bytes in one call.
std::string hash_to_hex(const std::array< uint8_t, 32 > &hash)
Convert a 32-byte SHA-256 hash to a lowercase hexadecimal string (64 chars).
@ HASH_CHAIN_BROKEN
The cryptographic audit hash chain is broken, indicating data tampering.
@ INVALID_FILE
The file is not a valid Parquet file (e.g. missing or wrong magic bytes).
@ INTERNAL_ERROR
An unexpected internal error that does not fit any other category.
@ CORRUPT_PAGE
A data page failed integrity checks (bad CRC, truncated, or exceeds size limits).
std::string generate_chain_id()
Generate a simple chain identifier based on the current timestamp.
SHA-256 hash function (NIST FIPS 180-4).
bool valid
True if the entire chain passed all integrity checks.
int64_t first_bad_index
Index of the first entry that failed verification, or -1 if all entries are valid.
int64_t entries_checked
Number of entries that were successfully verified before a failure was detected (or the total count i...
std::string error_message
Human-readable description of the verification outcome.
Chain summary stored in Parquet key-value metadata.
static expected< AuditMetadata > deserialize(const std::string &s)
Parse a metadata string back into AuditMetadata.
int64_t end_sequence
Sequence number of the last entry in this file's chain segment.
std::array< uint8_t, 32 > last_entry_hash
entry_hash of the last entry in this segment.
int64_t record_count
Number of audit records in this segment.
std::string last_hash
Hex string of the last entry's entry_hash in this segment.
std::string serialize() const
Convert to a semicolon-delimited metadata string.
std::string created_by
Creator string (e.g. "SignetStack signet-forge version 0.1.0").
std::string chain_id
Unique identifier for this chain (generated by generate_chain_id()).
std::vector< std::pair< std::string, std::string > > to_key_values() const
Export as Parquet file key-value metadata pairs.
std::string record_type
Record type: "decision", "inference", etc.
std::string first_hash
Hex string of the first entry's entry_hash in this segment.
std::array< uint8_t, 32 > first_prev_hash
prev_hash of the first entry in this segment (links to the prior file).
std::string prev_file_hash
Hex string of the first entry's prev_hash (links to the prior file).
int64_t start_sequence
Sequence number of the first entry in this file's chain segment.
NTP/PTP clock synchronization status for MiFID II RTS 25 Art.3.
int stratum
NTP stratum (1=primary, 2-15=secondary).
int64_t max_error_ns
Maximum estimated error (ns).
bool meets_rts25_standard() const
MiFID II RTS 25 Art.2: Non-HFT max divergence 1ms.
bool meets_rts25_hft() const
MiFID II RTS 25 Art.2: HFT gateway max divergence 100μs.
std::string sync_source
NTP/PTP server address.
bool is_synchronized
Whether clock is synced to NTP/PTP.
int64_t offset_ns
Estimated offset from UTC (absolute, ns).
int64_t last_check_ns
Timestamp of last sync check.
Lightweight error value carrying an ErrorCode and a human-readable message.
Definition error.hpp:101
std::string message
A human-readable description of what went wrong (may be empty for OK).
Definition error.hpp:105
A single link in the cryptographic hash chain.
int64_t sequence_number
0-indexed position in the chain, monotonically increasing.
void compute_entry_hash()
Derive entry_hash from the other fields.
std::array< uint8_t, 32 > entry_hash
SHA-256 commitment over (sequence_number, timestamp_ns, prev_hash, data_hash).
std::vector< uint8_t > serialize() const
Serialize this entry as 112 little-endian bytes.
bool verify() const
Check that entry_hash is consistent with the other fields.
static expected< HashChainEntry > deserialize(const uint8_t *data, size_t size)
Reconstruct a HashChainEntry from 112 bytes.
int64_t timestamp_ns
Nanoseconds since Unix epoch when this entry was created.
std::array< uint8_t, 32 > prev_hash
SHA-256 hash of the previous entry (all zeros for the first entry, or a user-supplied continuation ha...
std::array< uint8_t, 32 > data_hash
SHA-256 hash of the record/row data that this entry covers.
Parquet format enumerations, type traits, and statistics structs.