6#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
7#error "signet/ai/decision_log.hpp requires SIGNET_ENABLE_COMMERCIAL=ON (AGPL-3.0 commercial tier). See LICENSE_COMMERCIAL."
137 [[nodiscard]]
inline std::vector<uint8_t>
serialize()
const {
138 std::vector<uint8_t> buf;
145 append_le32(buf,
static_cast<uint32_t
>(
strategy_id));
156 append_float(buf, f);
166 append_le32(buf,
static_cast<uint32_t
>(
risk_result));
172 append_string(buf,
symbol);
175 append_double(buf,
price);
181 append_string(buf,
venue);
184 append_string(buf,
notes);
187 append_le32(buf,
static_cast<uint32_t
>(
buy_sell));
188 append_le32(buf,
static_cast<uint32_t
>(
order_type));
190 append_string(buf,
isin);
206 const uint8_t* data,
size_t size) {
213 if (!read_le32(data, size, offset, rec.
strategy_id)) {
221 if (!read_le32(data, size, offset, dt_val)) {
225 if (dt_val < 0 || dt_val > 7)
228 uint32_t feat_count = 0;
229 if (!read_le32_u(data, size, offset, feat_count)) {
232 if (feat_count > (size - offset) /
sizeof(float)) {
236 for (uint32_t i = 0; i < feat_count; ++i) {
242 if (!read_float(data, size, offset, rec.
model_output)) {
245 if (!read_float(data, size, offset, rec.
confidence)) {
250 if (!read_le32(data, size, offset, rr_val)) {
254 if (rr_val < 0 || rr_val > 3)
257 if (!read_string(data, size, offset, rec.
order_id)) {
260 if (!read_string(data, size, offset, rec.
symbol)) {
263 if (!read_double(data, size, offset, rec.
price)) {
266 if (!read_double(data, size, offset, rec.
quantity)) {
269 if (!read_string(data, size, offset, rec.
venue)) {
272 if (!read_string(data, size, offset, rec.
notes)) {
279 if (read_le32(data, size, offset, bs_val)) {
281 if (bs_val < 0 || bs_val > 2)
285 if (read_le32(data, size, offset, ot_val)) {
287 if (ot_val < 0 || (ot_val > 4 && ot_val != 99))
291 if (read_le32(data, size, offset, tif_val)) {
293 if (tif_val < 0 || (tif_val > 4 && tif_val != 99))
296 read_string(data, size, offset, rec.
isin);
297 read_string(data, size, offset, rec.
currency);
299 if (read_le32_u(data, size, offset, ssf))
302 if (read_le32_u(data, size, offset, agg))
305 if (read_le64(data, size, offset, vp))
316 static inline void append_le32(std::vector<uint8_t>& buf, uint32_t v) {
317 buf.push_back(
static_cast<uint8_t
>(v));
318 buf.push_back(
static_cast<uint8_t
>(v >> 8));
319 buf.push_back(
static_cast<uint8_t
>(v >> 16));
320 buf.push_back(
static_cast<uint8_t
>(v >> 24));
323 static inline void append_le64(std::vector<uint8_t>& buf, uint64_t v) {
324 for (
int i = 0; i < 8; ++i) {
325 buf.push_back(
static_cast<uint8_t
>(v >> (i * 8)));
329 static inline void append_float(std::vector<uint8_t>& buf,
float v) {
331 std::memcpy(&bits, &v, 4);
332 append_le32(buf, bits);
335 static inline void append_double(std::vector<uint8_t>& buf,
double v) {
337 std::memcpy(&bits, &v, 8);
338 append_le64(buf, bits);
341 static inline void append_string(std::vector<uint8_t>& buf,
const std::string& s) {
343 const size_t clamped = (std::min)(s.size(),
static_cast<size_t>(UINT32_MAX));
344 append_le32(buf,
static_cast<uint32_t
>(clamped));
345 buf.insert(buf.end(), s.begin(), s.begin() +
static_cast<ptrdiff_t
>(clamped));
350 static inline bool read_le64(
const uint8_t* data,
size_t size,
size_t& offset, int64_t& out) {
351 if (offset + 8 > size)
return false;
353 for (
int i = 0; i < 8; ++i) {
354 v |=
static_cast<uint64_t
>(data[offset + i]) << (i * 8);
356 out =
static_cast<int64_t
>(v);
361 static inline bool read_le32(
const uint8_t* data,
size_t size,
size_t& offset, int32_t& out) {
362 if (offset + 4 > size)
return false;
364 for (
int i = 0; i < 4; ++i) {
365 v |=
static_cast<uint32_t
>(data[offset + i]) << (i * 8);
367 out =
static_cast<int32_t
>(v);
372 static inline bool read_le32_u(
const uint8_t* data,
size_t size,
size_t& offset, uint32_t& out) {
373 if (offset + 4 > size)
return false;
375 for (
int i = 0; i < 4; ++i) {
376 out |=
static_cast<uint32_t
>(data[offset + i]) << (i * 8);
382 static inline bool read_float(
const uint8_t* data,
size_t size,
size_t& offset,
float& out) {
383 if (offset + 4 > size)
return false;
385 for (
int i = 0; i < 4; ++i) {
386 bits |=
static_cast<uint32_t
>(data[offset + i]) << (i * 8);
388 std::memcpy(&out, &bits, 4);
393 static inline bool read_double(
const uint8_t* data,
size_t size,
size_t& offset,
double& out) {
394 if (offset + 8 > size)
return false;
396 for (
int i = 0; i < 8; ++i) {
397 bits |=
static_cast<uint64_t
>(data[offset + i]) << (i * 8);
399 std::memcpy(&out, &bits, 8);
404 static constexpr uint32_t MAX_STRING_LEN = 16u * 1024u * 1024u;
406 static inline bool read_string(
const uint8_t* data,
size_t size,
size_t& offset, std::string& out) {
408 if (!read_le32_u(data, size, offset, len))
return false;
409 if (len > MAX_STRING_LEN)
return false;
410 if (offset + len > size)
return false;
411 out.assign(
reinterpret_cast<const char*
>(data + offset), len);
425inline std::string features_to_json(
const std::vector<float>& features) {
426 if (features.empty())
return "[]";
428 std::string result =
"[";
429 for (
size_t i = 0; i < features.size(); ++i) {
430 if (i > 0) result +=
',';
433 std::snprintf(buf,
sizeof(buf),
"%.8g",
static_cast<double>(features[i]));
446inline std::vector<float> json_to_features(
const std::string& json) {
449 static constexpr size_t MAX_JSON_ARRAY_ELEMENTS = 1'000'000;
450 std::vector<float> result;
451 if (json.size() < 2 || json.front() !=
'[' || json.back() !=
']') {
456 size_t end = json.size() - 1;
459 if (result.size() >= MAX_JSON_ARRAY_ELEMENTS)
break;
461 while (pos < end && (json[pos] ==
' ' || json[pos] ==
'\t')) ++pos;
462 if (pos >= end)
break;
465 size_t num_start = pos;
466 while (pos < end && json[pos] !=
',') ++pos;
469 std::string num_str = json.substr(num_start, pos - num_start);
471 while (!num_str.empty() && (num_str.back() ==
' ' || num_str.back() ==
'\t')) {
474 if (!num_str.empty()) {
476 result.push_back(std::stof(num_str));
483 if (pos < end && json[pos] ==
',') ++pos;
514 .column<int32_t>(
"strategy_id")
515 .
column<std::string>(
"model_version")
516 .column<int32_t>(
"decision_type")
517 .
column<std::string>(
"input_features")
518 .column<double>(
"model_output")
519 .
column<
double>(
"confidence")
520 .column<int32_t>(
"risk_result")
521 .
column<std::string>(
"order_id")
522 .column<std::string>(
"symbol")
524 .column<double>(
"quantity")
525 .
column<std::string>(
"venue")
526 .column<std::string>(
"notes")
527 .
column<int64_t>(
"chain_seq")
528 .column<std::string>(
"chain_hash")
529 .
column<std::string>(
"prev_hash")
530 .column<int64_t>(
"row_id")
531 .
column<int32_t>(
"row_version")
532 .column<std::string>(
"row_origin_file")
533 .
column<std::string>(
"row_prev_hash")
560 const std::string& chain_id =
"",
561 size_t max_records = 100000)
562 : output_dir_(output_dir)
564 , max_records_(max_records)
566 , lineage_tracker_(chain_id.empty() ? chain_id_ : chain_id, 1)
568 auto license = commercial::require_feature(
"DecisionLogWriter");
570 throw std::runtime_error(license.error().message);
581 if (output_dir_.empty())
582 throw std::invalid_argument(
"DecisionLogWriter: output_dir must not be empty");
583 for (
size_t s = 0, e; s <= output_dir_.size(); s = e + 1) {
584 e = output_dir_.find_first_of(
"/\\", s);
585 if (e == std::string::npos) e = output_dir_.size();
586 if (output_dir_.substr(s, e - s) ==
"..")
587 throw std::invalid_argument(
588 "DecisionLogWriter: output_dir must not contain '..' path traversal");
591 for (
char c : chain_id_) {
592 if (!std::isalnum(
static_cast<unsigned char>(c)) && c !=
'_' && c !=
'-')
593 throw std::invalid_argument(
594 "DecisionLogWriter: chain_id must only contain [a-zA-Z0-9_-]");
602 instrument_validator_ = std::move(validator);
611 auto usage = commercial::record_usage_rows(
"DecisionLogWriter::log", 1);
612 if (!usage)
return usage.error();
615 if (instrument_validator_ && !instrument_validator_(record.
symbol)) {
617 "DecisionLog: invalid symbol '" + record.
symbol +
"' (MiFID II RTS 24)"};
630 auto entry = chain_.
append(data.data(), data.size(), ts);
633 pending_records_.push_back(record);
634 pending_entries_.push_back(entry);
635 pending_data_.push_back(std::move(data));
638 if (pending_records_.size() >= max_records_) {
639 auto result =
flush();
640 if (!result)
return result.error();
653 if (pending_records_.empty()) {
658 int64_t start_seq = pending_entries_.front().sequence_number;
659 int64_t end_seq = pending_entries_.back().sequence_number;
661 current_file_path_ = output_dir_ +
"/decision_log_" + chain_id_ +
"_"
662 + std::to_string(start_seq) +
"_"
663 + std::to_string(end_seq) +
".parquet";
667 opts.
created_by =
"SignetStack signet-forge decision_log v1.0";
671 auto meta_kvs = meta.to_key_values();
672 for (
auto& [k, v] : meta_kvs) {
678 if (!writer_result)
return writer_result.error();
679 auto& writer = *writer_result;
682 size_t n = pending_records_.size();
683 for (
size_t i = 0; i < n; ++i) {
684 const auto& rec = pending_records_[i];
685 const auto& entry = pending_entries_[i];
688 const auto& row_data = pending_data_[i];
689 auto lineage = lineage_tracker_.
next(row_data.data(), row_data.size());
691 std::vector<std::string> row;
694 row.push_back(std::to_string(rec.timestamp_ns));
695 row.push_back(std::to_string(rec.strategy_id));
696 row.push_back(rec.model_version);
697 row.push_back(std::to_string(
static_cast<int32_t
>(rec.decision_type)));
698 row.push_back(detail::features_to_json(rec.input_features));
699 row.push_back(double_to_string(
static_cast<double>(rec.model_output)));
700 row.push_back(double_to_string(
static_cast<double>(rec.confidence)));
701 row.push_back(std::to_string(
static_cast<int32_t
>(rec.risk_result)));
702 row.push_back(rec.order_id);
703 row.push_back(rec.symbol);
704 row.push_back(double_to_string(rec.price));
705 row.push_back(double_to_string(rec.quantity));
706 row.push_back(rec.venue);
707 row.push_back(rec.notes);
708 row.push_back(std::to_string(entry.sequence_number));
711 row.push_back(std::to_string(lineage.row_id));
712 row.push_back(std::to_string(lineage.row_version));
713 row.push_back(lineage.row_origin_file);
714 row.push_back(lineage.row_prev_hash);
716 auto write_result = writer.write_row(row);
717 if (!write_result)
return write_result.error();
721 auto close_result = writer.close();
722 if (!close_result)
return close_result.error();
725 pending_records_.clear();
726 pending_entries_.clear();
727 pending_data_.clear();
735 if (!pending_records_.empty()) {
749 if (!pending_entries_.empty()) {
751 meta.
end_sequence = pending_entries_.back().sequence_number;
755 }
else if (!chain_.
entries().empty()) {
757 const auto& last = chain_.
entries().back();
764 meta.
record_count =
static_cast<int64_t
>(pending_entries_.size());
771 return pending_records_.size();
776 return total_records_;
781 return current_file_path_;
785 std::string output_dir_;
786 std::string chain_id_;
790 std::vector<DecisionRecord> pending_records_;
791 std::vector<HashChainEntry> pending_entries_;
792 std::vector<std::vector<uint8_t>> pending_data_;
794 std::string current_file_path_;
795 int64_t total_records_{0};
796 int64_t file_count_{0};
797 std::function<bool(
const std::string&)> instrument_validator_;
800 static inline std::string double_to_string(
double v) {
802 std::snprintf(buf,
sizeof(buf),
"%.17g", v);
821 auto license = commercial::require_feature(
"DecisionLogReader");
822 if (!license)
return license.error();
825 if (!reader_result)
return reader_result.error();
828 dlr.reader_ = std::make_unique<ParquetReader>(std::move(*reader_result));
832 auto load_result = dlr.load_columns();
833 if (!load_result)
return load_result.
error();
851 auto validation = validate_loaded_columns();
852 if (!validation)
return validation.error();
854 size_t n = col_timestamp_ns_.size();
855 std::vector<DecisionRecord> records;
858 for (
size_t i = 0; i < n; ++i) {
864 if (col_decision_type_[i] < 0 || col_decision_type_[i] > 7)
866 rec.
input_features = detail::json_to_features(col_input_features_[i]);
867 rec.
model_output =
static_cast<float>(col_model_output_[i]);
868 rec.
confidence =
static_cast<float>(col_confidence_[i]);
870 if (col_risk_result_[i] < 0 || col_risk_result_[i] > 3)
873 rec.
symbol = col_symbol_[i];
874 rec.
price = col_price_[i];
876 rec.
venue = col_venue_[i];
877 rec.
notes = col_notes_[i];
878 records.push_back(std::move(rec));
888 const auto& kvs = reader_->key_value_metadata();
891 for (
const auto& kv : kvs) {
892 if (!kv.value.has_value())
continue;
893 const auto& val = *kv.value;
895 if (kv.key ==
"signetstack.audit.chain_id") meta.
chain_id = val;
896 else if (kv.key ==
"signetstack.audit.first_seq") {
try { meta.
start_sequence = std::stoll(val); }
catch (...) {} }
897 else if (kv.key ==
"signetstack.audit.last_seq") {
try { meta.
end_sequence = std::stoll(val); }
catch (...) {} }
898 else if (kv.key ==
"signetstack.audit.first_hash") meta.
first_hash = val;
899 else if (kv.key ==
"signetstack.audit.last_hash") meta.
last_hash = val;
900 else if (kv.key ==
"signetstack.audit.prev_file_hash") meta.
prev_file_hash = val;
901 else if (kv.key ==
"signetstack.audit.record_count") {
try { meta.
record_count = std::stoll(val); }
catch (...) {} }
902 else if (kv.key ==
"signetstack.audit.record_type") meta.
record_type = val;
917 auto validation = validate_loaded_columns();
928 std::vector<HashChainEntry> entries;
929 size_t n = col_chain_seq_.size();
932 for (
size_t i = 0; i < n; ++i) {
944 bad.
error_message = !eh ?
"entry_hash deserialization failed at record "
946 :
"prev_hash deserialization failed at record "
959 if (col_decision_type_[i] < 0 || col_decision_type_[i] > 7)
961 rec.
input_features = detail::json_to_features(col_input_features_[i]);
962 rec.
model_output =
static_cast<float>(col_model_output_[i]);
963 rec.
confidence =
static_cast<float>(col_confidence_[i]);
965 if (col_risk_result_[i] < 0 || col_risk_result_[i] > 3)
968 rec.
symbol = col_symbol_[i];
969 rec.
price = col_price_[i];
971 rec.
venue = col_venue_[i];
972 rec.
notes = col_notes_[i];
977 entries.push_back(std::move(entry));
982 return verifier.
verify(entries);
987 return reader_->schema();
992 return reader_->num_rows();
996 std::unique_ptr<ParquetReader> reader_;
1000 std::vector<int64_t> col_timestamp_ns_;
1001 std::vector<int32_t> col_strategy_id_;
1002 std::vector<std::string> col_model_version_;
1003 std::vector<int32_t> col_decision_type_;
1004 std::vector<std::string> col_input_features_;
1005 std::vector<double> col_model_output_;
1006 std::vector<double> col_confidence_;
1007 std::vector<int32_t> col_risk_result_;
1008 std::vector<std::string> col_order_id_;
1009 std::vector<std::string> col_symbol_;
1010 std::vector<double> col_price_;
1011 std::vector<double> col_quantity_;
1012 std::vector<std::string> col_venue_;
1013 std::vector<std::string> col_notes_;
1014 std::vector<int64_t> col_chain_seq_;
1015 std::vector<std::string> col_chain_hash_;
1016 std::vector<std::string> col_prev_hash_;
1018 [[nodiscard]]
inline expected<void> validate_loaded_columns()
const {
1019 const size_t expected_rows = col_timestamp_ns_.size();
1020 const auto mismatch = [&](
const char* column_name,
size_t actual_rows)
1024 "DecisionLogReader: column '" + std::string(column_name) +
1025 "' row count mismatch in '" + path_ +
"' (expected " +
1026 std::to_string(expected_rows) +
", got " +
1027 std::to_string(actual_rows) +
")"};
1030 if (col_strategy_id_.size() != expected_rows)
1031 return mismatch(
"strategy_id", col_strategy_id_.size());
1032 if (col_model_version_.size() != expected_rows)
1033 return mismatch(
"model_version", col_model_version_.size());
1034 if (col_decision_type_.size() != expected_rows)
1035 return mismatch(
"decision_type", col_decision_type_.size());
1036 if (col_input_features_.size() != expected_rows)
1037 return mismatch(
"input_features", col_input_features_.size());
1038 if (col_model_output_.size() != expected_rows)
1039 return mismatch(
"model_output", col_model_output_.size());
1040 if (col_confidence_.size() != expected_rows)
1041 return mismatch(
"confidence", col_confidence_.size());
1042 if (col_risk_result_.size() != expected_rows)
1043 return mismatch(
"risk_result", col_risk_result_.size());
1044 if (col_order_id_.size() != expected_rows)
1045 return mismatch(
"order_id", col_order_id_.size());
1046 if (col_symbol_.size() != expected_rows)
1047 return mismatch(
"symbol", col_symbol_.size());
1048 if (col_price_.size() != expected_rows)
1049 return mismatch(
"price", col_price_.size());
1050 if (col_quantity_.size() != expected_rows)
1051 return mismatch(
"quantity", col_quantity_.size());
1052 if (col_venue_.size() != expected_rows)
1053 return mismatch(
"venue", col_venue_.size());
1054 if (col_notes_.size() != expected_rows)
1055 return mismatch(
"notes", col_notes_.size());
1056 if (col_chain_seq_.size() != expected_rows)
1057 return mismatch(
"chain_seq", col_chain_seq_.size());
1058 if (col_chain_hash_.size() != expected_rows)
1059 return mismatch(
"chain_hash", col_chain_hash_.size());
1060 if (col_prev_hash_.size() != expected_rows)
1061 return mismatch(
"prev_hash", col_prev_hash_.size());
1063 return expected<void>{};
1067 [[nodiscard]]
inline expected<void> load_columns() {
1068 int64_t num_rgs = reader_->num_row_groups();
1070 for (int64_t rg = 0; rg < num_rgs; ++rg) {
1071 size_t rg_idx =
static_cast<size_t>(rg);
1074 auto r0 = reader_->read_column<int64_t>(rg_idx, 0);
1075 if (!r0)
return r0.error();
1076 col_timestamp_ns_.insert(col_timestamp_ns_.end(), r0->begin(), r0->end());
1078 auto r1 = reader_->read_column<int32_t>(rg_idx, 1);
1079 if (!r1)
return r1.error();
1080 col_strategy_id_.insert(col_strategy_id_.end(), r1->begin(), r1->end());
1082 auto r2 = reader_->read_column<std::string>(rg_idx, 2);
1083 if (!r2)
return r2.error();
1084 col_model_version_.insert(col_model_version_.end(),
1085 std::make_move_iterator(r2->begin()), std::make_move_iterator(r2->end()));
1087 auto r3 = reader_->read_column<int32_t>(rg_idx, 3);
1088 if (!r3)
return r3.error();
1089 col_decision_type_.insert(col_decision_type_.end(), r3->begin(), r3->end());
1091 auto r4 = reader_->read_column<std::string>(rg_idx, 4);
1092 if (!r4)
return r4.error();
1093 col_input_features_.insert(col_input_features_.end(),
1094 std::make_move_iterator(r4->begin()), std::make_move_iterator(r4->end()));
1096 auto r5 = reader_->read_column<
double>(rg_idx, 5);
1097 if (!r5)
return r5.error();
1098 col_model_output_.insert(col_model_output_.end(), r5->begin(), r5->end());
1100 auto r6 = reader_->read_column<
double>(rg_idx, 6);
1101 if (!r6)
return r6.error();
1102 col_confidence_.insert(col_confidence_.end(), r6->begin(), r6->end());
1104 auto r7 = reader_->read_column<int32_t>(rg_idx, 7);
1105 if (!r7)
return r7.error();
1106 col_risk_result_.insert(col_risk_result_.end(), r7->begin(), r7->end());
1108 auto r8 = reader_->read_column<std::string>(rg_idx, 8);
1109 if (!r8)
return r8.error();
1110 col_order_id_.insert(col_order_id_.end(),
1111 std::make_move_iterator(r8->begin()), std::make_move_iterator(r8->end()));
1113 auto r9 = reader_->read_column<std::string>(rg_idx, 9);
1114 if (!r9)
return r9.error();
1115 col_symbol_.insert(col_symbol_.end(),
1116 std::make_move_iterator(r9->begin()), std::make_move_iterator(r9->end()));
1118 auto r10 = reader_->read_column<
double>(rg_idx, 10);
1119 if (!r10)
return r10.error();
1120 col_price_.insert(col_price_.end(), r10->begin(), r10->end());
1122 auto r11 = reader_->read_column<
double>(rg_idx, 11);
1123 if (!r11)
return r11.error();
1124 col_quantity_.insert(col_quantity_.end(), r11->begin(), r11->end());
1126 auto r12 = reader_->read_column<std::string>(rg_idx, 12);
1127 if (!r12)
return r12.error();
1128 col_venue_.insert(col_venue_.end(),
1129 std::make_move_iterator(r12->begin()), std::make_move_iterator(r12->end()));
1131 auto r13 = reader_->read_column<std::string>(rg_idx, 13);
1132 if (!r13)
return r13.error();
1133 col_notes_.insert(col_notes_.end(),
1134 std::make_move_iterator(r13->begin()), std::make_move_iterator(r13->end()));
1136 auto r14 = reader_->read_column<int64_t>(rg_idx, 14);
1137 if (!r14)
return r14.error();
1138 col_chain_seq_.insert(col_chain_seq_.end(), r14->begin(), r14->end());
1140 auto r15 = reader_->read_column<std::string>(rg_idx, 15);
1141 if (!r15)
return r15.error();
1142 col_chain_hash_.insert(col_chain_hash_.end(),
1143 std::make_move_iterator(r15->begin()), std::make_move_iterator(r15->end()));
1145 auto r16 = reader_->read_column<std::string>(rg_idx, 16);
1146 if (!r16)
return r16.error();
1147 col_prev_hash_.insert(col_prev_hash_.end(),
1148 std::make_move_iterator(r16->begin()), std::make_move_iterator(r16->end()));
1151 return validate_loaded_columns();
Verifies hash chain integrity.
static VerificationResult verify(const uint8_t *chain_data, size_t chain_size)
Verify a chain from serialized bytes.
Builds SHA-256 hash chains during Parquet writes.
const std::vector< HashChainEntry > & entries() const
Return a const reference to the internal entry list.
HashChainEntry append(const uint8_t *record_data, size_t record_size, int64_t timestamp_ns)
Append a record to the chain with an explicit timestamp.
Reads AI decision log Parquet files and verifies hash chain integrity.
DecisionLogReader & operator=(DecisionLogReader &&)=default
static expected< DecisionLogReader > open(const std::string &path)
Open a decision log Parquet file and pre-load all column data.
DecisionLogReader(DecisionLogReader &&)=default
DecisionLogReader & operator=(const DecisionLogReader &)=delete
DecisionLogReader(const DecisionLogReader &)=delete
DecisionLogReader()=default
expected< std::vector< DecisionRecord > > read_all() const
Get all decision records from the file.
AuditChainVerifier::VerificationResult verify_chain() const
Verify the hash chain integrity by re-hashing each record and checking chain continuity.
const Schema & schema() const
Get the schema of the decision log file.
int64_t num_records() const
Number of records in the file.
expected< AuditMetadata > audit_metadata() const
Get the audit chain metadata from the Parquet file's key-value metadata.
Writes AI trading decision records to Parquet files with cryptographic hash chaining for tamper-evide...
expected< void > close()
Close the writer (flushes remaining records).
size_t pending_records() const
Get the number of records in the current (unflushed) batch.
int64_t total_records() const
Get the total number of records written across all files.
AuditMetadata current_metadata() const
Get the chain metadata for the current batch.
void set_instrument_validator(std::function< bool(const std::string &)> validator)
Set an optional symbol/instrument validator callback (MiFID II RTS 24).
std::string current_file_path() const
Get the file path of the current (or last written) output file.
DecisionLogWriter(const std::string &output_dir, const std::string &chain_id="", size_t max_records=100000)
Create a decision log writer.
expected< void > flush()
Flush current records to a Parquet file.
expected< HashChainEntry > log(const DecisionRecord &record)
Log a trading decision.
static expected< ParquetReader > open(const std::filesystem::path &path)
Open and parse a Parquet file, returning a ready-to-query reader.
static expected< ParquetWriter > open(const std::filesystem::path &path, const Schema &schema, const Options &options=Options{})
Open a new Parquet file for writing.
Per-row lineage tracking inspired by Iceberg V3-style data governance.
RowLineage next(const uint8_t *row_data, size_t row_size)
Generate lineage for the next row.
SchemaBuilder & column(std::string col_name, LogicalType logical_type=LogicalType::NONE)
Add a typed column, deducing PhysicalType from T.
Immutable schema description for a Parquet file.
static SchemaBuilder builder(std::string name)
Create a SchemaBuilder for fluent column construction.
const Error & error() const
Access the error payload (valid for both success and failure; check ok() on the returned Error).
A lightweight result type that holds either a success value of type T or an Error.
std::array< uint8_t, 32 > sha256(const uint8_t *data, size_t size)
Compute SHA-256 hash of arbitrary-length input.
int64_t now_ns()
Return the current time as nanoseconds since the Unix epoch (UTC).
expected< std::array< uint8_t, 32 > > hex_to_hash(const std::string &hex)
Convert a 64-character lowercase hex string back to a 32-byte hash.
RiskGateResult
Outcome of the pre-trade risk gate evaluation.
@ PASSED
All risk checks passed.
@ MODIFIED
Order modified by risk gate (e.g., size reduced)
@ REJECTED
Order rejected by risk gate.
@ THROTTLED
Order delayed by rate limiting.
TimeInForce
Time-in-force classification for MiFID II RTS 24 Annex I Table 2 Field 8.
@ DAY
Day order (valid until end of trading day)
@ IOC
Immediate-Or-Cancel.
@ GTC
Good-Till-Cancelled.
BuySellIndicator
Buy/sell direction for MiFID II RTS 24 Annex I Table 2 Field 6.
@ SHORT_SELL
Short selling (RTS 24 Annex I Field 16)
OrderType
Order type classification for MiFID II RTS 24 Annex I Table 2 Field 7.
@ STOP_LIMIT
Stop-limit order.
DecisionType
Classification of the AI-driven trading decision.
@ NO_ACTION
Model evaluated but no action taken.
@ RISK_OVERRIDE
Risk gate override/rejection.
@ ORDER_NEW
Decision to submit a new order.
@ ORDER_CANCEL
Decision to cancel an existing order.
@ SIGNAL
Raw model signal/prediction.
@ ORDER_MODIFY
Decision to modify an existing order.
@ POSITION_CLOSE
Decision to close a position.
@ POSITION_OPEN
Decision to open a position.
@ TIMESTAMP_NS
Timestamp — INT64, nanoseconds since Unix epoch.
std::string hash_to_hex(const std::array< uint8_t, 32 > &hash)
Convert a 32-byte SHA-256 hash to a lowercase hexadecimal string (64 chars).
@ INVALID_ARGUMENT
A caller-supplied argument is outside the valid range or violates a precondition.
@ CORRUPT_PAGE
A data page failed integrity checks (bad CRC, truncated, or exceeds size limits).
@ CORRUPT_DATA
Decoded data is corrupt or inconsistent (e.g. out-of-range dictionary index).
std::string generate_chain_id()
Generate a simple chain identifier based on the current timestamp.
Schema decision_log_schema()
Build the Parquet schema for AI decision log files.
Per-row lineage tracking (Iceberg V3-style) with monotonic row IDs, mutation versioning,...
Schema definition types: Column<T>, SchemaBuilder, and Schema.
Result of a full chain verification.
bool valid
True if the entire chain passed all integrity checks.
int64_t first_bad_index
Index of the first entry that failed verification, or -1 if all entries are valid.
int64_t entries_checked
Number of entries that were successfully verified before a failure was detected (or the total count i...
std::string error_message
Human-readable description of the verification outcome.
A single AI-driven trading decision with full provenance.
std::string symbol
Trading symbol.
std::string order_id
Associated order ID (empty if none)
std::string notes
Optional free-text notes.
double price
Decision price.
DecisionType decision_type
What type of decision.
std::string currency
Field 9: Currency (ISO 4217, 3 chars, e.g. "USD")
BuySellIndicator buy_sell
Field 6: Buy/sell direction.
RiskGateResult risk_result
Risk gate outcome.
float model_output
Primary model output (e.g., signal strength)
std::vector< uint8_t > serialize() const
Serialize the record to a deterministic byte sequence.
static expected< DecisionRecord > deserialize(const uint8_t *data, size_t size)
Reconstruct a DecisionRecord from its serialized byte representation.
std::vector< float > input_features
Input feature vector to the model.
std::string isin
Field 5: ISIN (ISO 6166, 12 chars)
std::string model_version
Model version hash or identifier.
std::string venue
Execution venue.
TimeInForce time_in_force
Field 8: Time-in-force.
std::string parent_order_id
R-17: Parent order for lifecycle linking.
int64_t timestamp_ns
Decision timestamp (nanoseconds since epoch)
double quantity
Decision quantity.
float confidence
Model confidence [0.0, 1.0].
OrderType order_type
Field 7: Order type.
int64_t validity_period_ns
Field 10: GTD validity timestamp (0=N/A)
int32_t strategy_id
Which strategy made this decision.
bool short_selling_flag
Field 16: Short selling indicator.
bool aggregated_order
Field 17: Aggregated order flag.
Lightweight error value carrying an ErrorCode and a human-readable message.
A single link in the cryptographic hash chain.
int64_t sequence_number
0-indexed position in the chain, monotonically increasing.
std::array< uint8_t, 32 > entry_hash
SHA-256 commitment over (sequence_number, timestamp_ns, prev_hash, data_hash).
int64_t timestamp_ns
Nanoseconds since Unix epoch when this entry was created.
std::array< uint8_t, 32 > prev_hash
SHA-256 hash of the previous entry (all zeros for the first entry, or a user-supplied continuation ha...
std::array< uint8_t, 32 > data_hash
SHA-256 hash of the record/row data that this entry covers.
Configuration options for ParquetWriter.
std::vector< thrift::KeyValue > file_metadata
Custom key-value metadata pairs embedded in the Parquet footer.
std::string created_by
Value written into the Parquet footer's "created_by" field.
Parquet KeyValue metadata entry (parquet.thrift field IDs 1-2).
Parquet format enumerations, type traits, and statistics structs.