Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
data_classification.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3// See LICENSE_COMMERCIAL for full terms.
4#pragma once
5
6#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
7#error "signet/ai/data_classification.hpp requires SIGNET_ENABLE_COMMERCIAL=ON (AGPL-3.0 commercial tier). See LICENSE_COMMERCIAL."
8#endif
9
10// ---------------------------------------------------------------------------
11// data_classification.hpp -- Formal Data Classification Ontology
12//
13// Gap G-9: Formal data classification ontology per DORA Art.8 + GDPR Art.32.
14//
15// Provides a structured, machine-readable data classification framework:
16// - DORA Art.8: ICT asset classification (data at rest, in transit)
17// - GDPR Art.9: Special categories of personal data
18// - GDPR Art.32: Appropriate security measures per classification
19// - NIST SP 800-60: Information types and security categorization
20//
21// Components:
22// - DataClassification: 4-tier confidentiality levels
23// - DataSensitivity: GDPR Art.9 special category types
24// - RegulatoryRegime: applicable regulatory frameworks
25// - DataClassificationRule: per-field classification + handling policy
26// - DataClassificationOntology: rule registry with validation
27//
28// Header-only. Part of the signet::forge AI module.
29// ---------------------------------------------------------------------------
30
31#include "signet/error.hpp"
32
33#include <algorithm>
34#include <cstdint>
35#include <string>
36#include <unordered_map>
37#include <vector>
38
39namespace signet::forge {
40
41// ---------------------------------------------------------------------------
42// Enumerations
43// ---------------------------------------------------------------------------
44
46enum class DataClassification : int32_t {
47 PUBLIC = 0,
48 INTERNAL = 1,
49 RESTRICTED = 2,
51};
52
54enum class DataSensitivity : int32_t {
55 NEUTRAL = 0,
56 PSEUDONYMISED = 1,
57 ANONYMISED = 2,
58 PII = 3,
59 FINANCIAL_PII = 4,
60 BIOMETRIC = 5,
61 HEALTH = 6
62};
63
65enum class RegulatoryRegime : int32_t {
66 NONE = 0,
67 GDPR = 1,
68 MIFID2 = 2,
69 DORA = 3,
70 EU_AI_ACT = 4,
71 SOX = 5,
72 SEC_17A4 = 6,
73 PCI_DSS = 7,
74 HIPAA = 8
75};
76
77// ---------------------------------------------------------------------------
78// DataClassificationRule
79// ---------------------------------------------------------------------------
80
84 std::string field_name;
88
89 // --- Retention lifecycle ---
90 int64_t min_retention_ns = 0;
91 int64_t max_retention_ns = INT64_C(157788000000000000);
92
93 // --- Processing restrictions ---
94 bool require_encryption = false;
96 bool allow_aggregation = true;
97 bool allow_ml_training = true;
98 bool allow_export = true;
99 bool allow_logging = true;
100
101 // --- Purpose limitation (GDPR Art.5(1)(b)) ---
102 std::vector<std::string> allowed_purposes;
103};
104
105// ---------------------------------------------------------------------------
106// DataClassificationOntology
107// ---------------------------------------------------------------------------
108
114public:
116 explicit DataClassificationOntology(const std::string& ontology_id = "default")
117 : ontology_id_(ontology_id) {
118 (void)commercial::require_feature("DataClassificationOntology");
119 }
120
123 rules_[rule.field_name] = rule;
124 }
125
128 [[nodiscard]] DataClassificationRule lookup(const std::string& field_name) const {
129 auto it = rules_.find(field_name);
130 if (it != rules_.end()) return it->second;
131 // Unknown fields default to PUBLIC/NEUTRAL (least restrictive) —
132 // callers that need fail-closed semantics should register all fields
133 // explicitly or use validate_handling() with require_encryption=true.
135 dflt.field_name = field_name;
138 return dflt;
139 }
140
142 [[nodiscard]] std::vector<DataClassificationRule> all_rules() const {
143 std::vector<DataClassificationRule> out;
144 out.reserve(rules_.size());
145 for (const auto& [_, r] : rules_) out.push_back(r);
146 return out;
147 }
148
150 [[nodiscard]] size_t size() const { return rules_.size(); }
151
153 [[nodiscard]] const std::string& ontology_id() const { return ontology_id_; }
154
160 const std::string& field_name,
161 bool is_encrypted,
162 bool is_pseudonymised,
163 bool purpose_is_allowed = true) const
164 {
165 auto rule = lookup(field_name);
166
167 // HIGHLY_RESTRICTED or RESTRICTED fields must be encrypted
168 if (rule.require_encryption && !is_encrypted) {
170 "Data classification violation: field '" + field_name +
171 "' requires encryption (classification=" +
172 classification_name(rule.classification) + ")"};
173 }
174
175 // PII fields should be pseudonymised unless explicitly allowed
176 if (rule.sensitivity >= DataSensitivity::PII &&
177 !is_pseudonymised && !rule.allow_logging) {
179 "Data classification violation: field '" + field_name +
180 "' contains sensitive data and must be pseudonymised for logging"};
181 }
182
183 // Purpose limitation check
184 if (!purpose_is_allowed && !rule.allowed_purposes.empty()) {
186 "Data classification violation: field '" + field_name +
187 "' processing purpose not in allowed list (GDPR Art.5(1)(b))"};
188 }
189
190 return {};
191 }
192
195 DataClassificationOntology ont("financial-default");
196
197 // Public data
200 ont.add_rule({"timestamp", DataClassification::PUBLIC,
202
203 // Internal market data
204 {
206 r.field_name = "price";
210 r.min_retention_ns = INT64_C(157788000000000000); // 5y MiFID II
211 ont.add_rule(r);
212 }
213 {
215 r.field_name = "volume";
219 r.min_retention_ns = INT64_C(157788000000000000);
220 ont.add_rule(r);
221 }
222
223 // Restricted trading data
224 {
226 r.field_name = "strategy_id";
230 r.require_encryption = true;
231 r.allow_ml_training = false;
232 r.min_retention_ns = INT64_C(157788000000000000);
233 ont.add_rule(r);
234 }
235
236 // Highly restricted PII
237 {
239 r.field_name = "trader_id";
243 r.require_encryption = true;
244 r.allow_ml_training = false;
245 r.allow_export = false;
246 r.allow_logging = false;
247 r.allowed_purposes = {"compliance-reporting", "regulatory-inquiry"};
248 ont.add_rule(r);
249 }
250
251 // Cryptographic key material
252 {
254 r.field_name = "encryption_key";
258 r.require_encryption = true;
259 r.allow_pseudonymisation = false;
260 r.allow_aggregation = false;
261 r.allow_ml_training = false;
262 r.allow_export = false;
263 r.allow_logging = false;
264 ont.add_rule(r);
265 }
266
267 return ont;
268 }
269
270private:
271 static std::string classification_name(DataClassification c) {
272 switch (c) {
273 case DataClassification::PUBLIC: return "PUBLIC";
274 case DataClassification::INTERNAL: return "INTERNAL";
275 case DataClassification::RESTRICTED: return "RESTRICTED";
276 case DataClassification::HIGHLY_RESTRICTED: return "HIGHLY_RESTRICTED";
277 }
278 return "UNKNOWN";
279 }
280
281 std::string ontology_id_;
282 std::unordered_map<std::string, DataClassificationRule> rules_;
283};
284
285} // namespace signet::forge
A named collection of data classification rules forming a formal ontology.
const std::string & ontology_id() const
Ontology identifier.
DataClassificationRule lookup(const std::string &field_name) const
Look up the classification rule for a field.
DataClassificationOntology(const std::string &ontology_id="default")
Construct an ontology with the given identifier.
expected< void > validate_handling(const std::string &field_name, bool is_encrypted, bool is_pseudonymised, bool purpose_is_allowed=true) const
Validate that a field's actual handling meets classification requirements.
std::vector< DataClassificationRule > all_rules() const
Get all registered rules.
static DataClassificationOntology financial_default()
Build a default ontology with standard financial/compliance field rules.
size_t size() const
Number of registered rules.
void add_rule(const DataClassificationRule &rule)
Add a classification rule for a field.
A lightweight result type that holds either a success value of type T or an Error.
Definition error.hpp:145
RegulatoryRegime
Regulatory regime(s) applicable to the data.
@ SEC_17A4
SEC Rule 17a-4 (records retention)
@ PCI_DSS
Payment Card Industry Data Security Standard.
@ GDPR
EU General Data Protection Regulation.
@ DORA
Digital Operational Resilience Act.
@ MIFID2
Markets in Financial Instruments Directive II.
@ EU_AI_ACT
EU Artificial Intelligence Act.
@ SOX
Sarbanes-Oxley Act.
@ HIPAA
Health Insurance Portability and Accountability Act.
DataClassification
Data confidentiality level per DORA Art.8 + ISO 27001 Annex A.
@ RESTRICTED
Regulated data (GDPR, FCA, MiFID II)
@ INTERNAL
Business-internal, not for external sharing.
@ HIGHLY_RESTRICTED
Cryptographic keys, trading secrets, PII.
@ PUBLIC
No confidentiality requirement.
DataSensitivity
Data sensitivity per GDPR Art.9 special categories.
@ BIOMETRIC
Biometric data (Art.9 special category)
@ ANONYMISED
Irreversibly de-identified (Art.4(1))
@ NEUTRAL
No special sensitivity.
@ FINANCIAL_PII
Financial account data, trading activity.
@ HEALTH
Health/genetic data (Art.9 special category)
@ PII
Personally Identifiable Information.
@ PSEUDONYMISED
Identifiable only with additional key (Art.25)
@ INVALID_ARGUMENT
A caller-supplied argument is outside the valid range or violates a precondition.
Per-field data classification and handling policy.
bool allow_logging
Biometric, health → false in plaintext.
bool require_encryption
RESTRICTED/HIGHLY_RESTRICTED → true.
int64_t max_retention_ns
Max retention (default 5y)
bool allow_export
HIGHLY_RESTRICTED → false.
std::string field_name
Column/field path (e.g., "user.email", "price")
int64_t min_retention_ns
Minimum retention (0 = no min)
bool allow_ml_training
PII, secrets → false.
Lightweight error value carrying an ErrorCode and a human-readable message.
Definition error.hpp:101