Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
row_lineage.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3// See LICENSE_COMMERCIAL for full terms.
4
9
10#pragma once
11
12#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
13#error "signet/ai/row_lineage.hpp requires SIGNET_ENABLE_COMMERCIAL=ON (AGPL-3.0 commercial tier). See LICENSE_COMMERCIAL."
14#endif
15
16// ---------------------------------------------------------------------------
17// row_lineage.hpp -- Per-row lineage tracking (Iceberg V3-style)
18//
19// Generates monotonic row_id, tracks mutation version, origin file/batch,
20// and a per-row SHA-256 prev_hash chain for tamper-evident per-row ordering.
21//
22// This extends the batch-level audit chain (audit_chain.hpp) to provide
23// row-granularity lineage suitable for Iceberg V3 style data governance.
24// ---------------------------------------------------------------------------
25
26#include "signet/crypto/sha256.hpp" // crypto::detail::sha256::sha256
27#include "signet/error.hpp" // commercial::require_feature
28
29#include <array>
30#include <cstdint>
31#include <string>
32
33namespace signet::forge {
34
47public:
49 struct RowLineage {
50 int64_t row_id;
51 int32_t row_version;
52 std::string row_origin_file;
53 std::string row_prev_hash;
54 };
55
61 explicit RowLineageTracker(const std::string& origin_file = "",
62 int32_t initial_version = 1)
63 : origin_file_(origin_file)
64 , version_(initial_version) {
65 // AGPL-3.0 commercial tier gating: the license check result is intentionally
66 // discarded here. In demo/dev mode (SIGNET_REQUIRE_COMMERCIAL_LICENSE=OFF),
67 // require_feature() always succeeds. In production mode, a missing license
68 // causes a hard error at a higher level (InferenceLogWriter /
69 // DecisionLogWriter constructor). Discarding here allows RowLineageTracker
70 // to be used standalone in tests without the full license validation pipeline.
71 auto gate = commercial::require_feature("RowLineageTracker");
72 (void)gate;
73 }
74
77 [[nodiscard]] RowLineage next(const uint8_t* row_data, size_t row_size) {
78 RowLineage lineage;
79 lineage.row_id = next_row_id_++;
80 lineage.row_version = version_;
81 lineage.row_origin_file = origin_file_;
82 lineage.row_prev_hash = prev_hash_hex_;
83
84 // Compute cumulative hash: SHA-256(prev_hash_hex + row_data)
85 // This creates a true chain where each row depends on all prior rows.
86 std::vector<uint8_t> chain_input;
87 chain_input.reserve(prev_hash_hex_.size() + row_size);
88 chain_input.insert(chain_input.end(), prev_hash_hex_.begin(), prev_hash_hex_.end());
89 chain_input.insert(chain_input.end(), row_data, row_data + row_size);
90 auto hash = crypto::detail::sha256::sha256(chain_input.data(), chain_input.size());
91 prev_hash_hex_ = hash_to_hex_impl(hash);
92
93 return lineage;
94 }
95
97 [[nodiscard]] int64_t current_row_id() const noexcept { return next_row_id_; }
98
106 void reset(const std::string& origin_file, int32_t version = 1) {
107 origin_file_ = origin_file;
108 version_ = version;
109 }
110
111private:
112 std::string origin_file_;
113 int32_t version_ = 1;
114 int64_t next_row_id_ = 0;
115 std::string prev_hash_hex_ = std::string(64, '0'); // Genesis: all zeros
116
118 static std::string hash_to_hex_impl(const std::array<uint8_t, 32>& hash) {
119 static constexpr char hex[] = "0123456789abcdef";
120 std::string result(64, '\0');
121 for (size_t i = 0; i < 32; ++i) {
122 result[2*i] = hex[hash[i] >> 4];
123 result[2*i + 1] = hex[hash[i] & 0x0F];
124 }
125 return result;
126 }
127};
128
129} // namespace signet::forge
Per-row lineage tracking inspired by Iceberg V3-style data governance.
RowLineageTracker(const std::string &origin_file="", int32_t initial_version=1)
Construct a tracker for the given origin file and initial version.
int64_t current_row_id() const noexcept
Get current row counter (next row_id to be assigned).
RowLineage next(const uint8_t *row_data, size_t row_size)
Generate lineage for the next row.
void reset(const std::string &origin_file, int32_t version=1)
Reset the origin file and version for a new batch.
std::array< uint8_t, 32 > sha256(const uint8_t *data, size_t size)
Compute SHA-256 hash of arbitrary-length input.
Definition sha256.hpp:165
SHA-256 hash function (NIST FIPS 180-4).
Lineage metadata for a single row.
int32_t row_version
Mutation version counter.
std::string row_origin_file
Source file/batch identifier.
std::string row_prev_hash
SHA-256 hex of the previous row's serialized data.
int64_t row_id
Monotonic row identifier (0-based, never resets)