Signet Forge 0.1.1
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
log_retention.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3// See LICENSE_COMMERCIAL for full terms.
4#pragma once
5
6#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
7#error "signet/ai/log_retention.hpp requires SIGNET_ENABLE_COMMERCIAL=ON (AGPL-3.0 commercial tier). See LICENSE_COMMERCIAL."
8#endif
9
10// ---------------------------------------------------------------------------
11// log_retention.hpp -- Log Retention Lifecycle Manager
12//
13// EU AI Act Art.12(3) requires automatic recording of events over the
14// lifetime of the system with defined retention periods (minimum 6 months).
15// MiFID II RTS 24 Art.4 requires 5-year retention for order records.
16//
17// This module provides:
18// - RetentionPolicy: configurable TTL, archival, and size-based rotation
19// - LogRetentionManager: scans log directories and enforces retention
20// - Archival callback for moving expired logs to cold storage
21// - Deletion of logs past the maximum retention period
22// - Compliance summary reporting
23//
24// Header-only. Part of the signet::forge AI module.
25// ---------------------------------------------------------------------------
26
27#include "signet/error.hpp"
28
29#include <algorithm>
30#include <stdexcept>
31#include <chrono>
32#include <cstdint>
33#include <filesystem>
34#include <functional>
35#include <string>
36#include <vector>
37
38namespace signet::forge {
39
40// ---------------------------------------------------------------------------
41// RetentionPolicy
42// ---------------------------------------------------------------------------
43
50 int64_t min_retention_ns = INT64_C(15778800000000000); // ~6 months
51
55 int64_t max_retention_ns = INT64_C(157788000000000000); // ~5 years
56
61 int64_t archive_after_ns = INT64_C(31557600000000000); // ~1 year
62
66 uint64_t max_active_size_bytes = UINT64_C(10737418240); // 10 GB
67
71 uint64_t max_active_files = 10000;
72
76 std::string file_suffix = ".parquet";
77
80 bool enable_deletion = false;
81
84 bool enable_archival = false;
85};
86
87// ---------------------------------------------------------------------------
88// RetentionSummary
89// ---------------------------------------------------------------------------
90
94 int64_t files_scanned = 0;
95
97 int64_t files_active = 0;
98
100 int64_t files_archived = 0;
101
103 int64_t files_deleted = 0;
104
106 int64_t files_failed = 0;
107
109 uint64_t active_size_bytes = 0;
110
113
115 uint64_t deleted_size_bytes = 0;
116
118 bool dry_run = true;
119
121 std::vector<std::string> archived_paths;
122
124 std::vector<std::string> deleted_paths;
125
127 std::vector<std::string> errors;
128};
129
130// ---------------------------------------------------------------------------
131// LogRetentionManager
132// ---------------------------------------------------------------------------
133
152public:
156 using ArchiveCallback = std::function<bool(const std::string& source_path)>;
157
160 using PreDeleteCallback = std::function<bool(const std::string& path)>;
161
163 : policy_(std::move(policy)) {
164 auto gate = commercial::require_feature("LogRetentionManager");
165 if (!gate) throw std::runtime_error(gate.error().message);
166 }
167
170 archive_cb_ = std::move(cb);
171 }
172
175 pre_delete_cb_ = std::move(cb);
176 }
177
187 [[nodiscard]] inline RetentionSummary enforce(
188 const std::string& log_dir, int64_t now_ns) const {
189 RetentionSummary summary;
190 summary.dry_run = !(policy_.enable_archival || policy_.enable_deletion);
191
192 namespace fs = std::filesystem;
193
194 if (!fs::exists(log_dir) || !fs::is_directory(log_dir)) {
195 summary.errors.push_back("Directory does not exist: " + log_dir);
196 return summary;
197 }
198
199 // Collect matching files with their modification times
200 struct FileInfo {
201 std::string path;
202 int64_t mod_time_ns;
203 uint64_t size_bytes;
204 };
205 std::vector<FileInfo> files;
206
207 for (const auto& entry : fs::directory_iterator(log_dir)) {
208 if (!entry.is_regular_file()) continue;
209
210 const auto& path = entry.path();
211 if (!path.string().ends_with(policy_.file_suffix)) continue;
212
213 std::error_code ec;
214 auto mod_time = fs::last_write_time(path, ec);
215 if (ec) continue;
216
217 // Convert file_time_type to nanoseconds since epoch
218 auto sys_time = std::chrono::file_clock::to_sys(mod_time);
219 auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
220 sys_time.time_since_epoch()).count();
221
222 auto fsize = entry.file_size(ec);
223 if (ec) fsize = 0;
224
225 files.push_back({path.string(), static_cast<int64_t>(ns), fsize});
226 }
227
228 // Sort by modification time (oldest first)
229 std::sort(files.begin(), files.end(),
230 [](const FileInfo& a, const FileInfo& b) {
231 return a.mod_time_ns < b.mod_time_ns;
232 });
233
234 summary.files_scanned = static_cast<int64_t>(files.size());
235
236 for (const auto& fi : files) {
237 int64_t age_ns = now_ns - fi.mod_time_ns;
238
239 if (age_ns > policy_.max_retention_ns) {
240 // Past maximum retention — eligible for deletion
241 if (policy_.enable_deletion) {
242 bool allow = true;
243 if (pre_delete_cb_) {
244 allow = pre_delete_cb_(fi.path);
245 }
246 if (allow) {
247 std::error_code ec;
248 fs::remove(fi.path, ec);
249 if (ec) {
250 summary.errors.push_back("Failed to delete: " + fi.path
251 + " (" + ec.message() + ")");
252 ++summary.files_failed;
253 } else {
254 ++summary.files_deleted;
255 summary.deleted_size_bytes += fi.size_bytes;
256 summary.deleted_paths.push_back(fi.path);
257 }
258 } else {
259 ++summary.files_failed;
260 }
261 } else {
262 // Dry-run
263 ++summary.files_deleted;
264 summary.deleted_size_bytes += fi.size_bytes;
265 summary.deleted_paths.push_back(fi.path);
266 }
267 } else if (age_ns > policy_.archive_after_ns) {
268 // Past archival threshold — eligible for archival
269 if (policy_.enable_archival && archive_cb_) {
270 bool ok = archive_cb_(fi.path);
271 if (ok) {
272 ++summary.files_archived;
273 summary.archived_size_bytes += fi.size_bytes;
274 summary.archived_paths.push_back(fi.path);
275 } else {
276 summary.errors.push_back("Archive callback failed: " + fi.path);
277 ++summary.files_failed;
278 }
279 } else {
280 // Dry-run or no callback
281 ++summary.files_archived;
282 summary.archived_size_bytes += fi.size_bytes;
283 summary.archived_paths.push_back(fi.path);
284 }
285 } else {
286 // Active — within retention window
287 ++summary.files_active;
288 summary.active_size_bytes += fi.size_bytes;
289 }
290 }
291
292 // Check size-based overflow: if active files exceed max, archive oldest
293 if (summary.active_size_bytes > policy_.max_active_size_bytes) {
294 // Already sorted oldest-first — the active files are the ones
295 // not already in archived/deleted lists. This is informational
296 // in the summary for now; the caller can re-invoke with a
297 // shorter archive_after_ns to trigger size-based archival.
298 }
299
300 return summary;
301 }
302
308 struct FileStatus {
309 std::string path;
310 int64_t age_ns;
311 uint64_t size_bytes;
313 };
314
315 [[nodiscard]] inline std::vector<FileStatus> list_files(
316 const std::string& log_dir, int64_t now_ns) const {
317 std::vector<FileStatus> result;
318 namespace fs = std::filesystem;
319
320 if (!fs::exists(log_dir) || !fs::is_directory(log_dir)) return result;
321
322 for (const auto& entry : fs::directory_iterator(log_dir)) {
323 if (!entry.is_regular_file()) continue;
324 const auto& path = entry.path();
325 if (!path.string().ends_with(policy_.file_suffix)) continue;
326
327 std::error_code ec;
328 auto mod_time = fs::last_write_time(path, ec);
329 if (ec) continue;
330
331 auto sys_time = std::chrono::file_clock::to_sys(mod_time);
332 auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
333 sys_time.time_since_epoch()).count();
334
335 auto fsize = entry.file_size(ec);
336 if (ec) fsize = 0;
337
338 int64_t age = now_ns - static_cast<int64_t>(ns);
339
340 FileStatus fs_entry;
341 fs_entry.path = path.string();
342 fs_entry.age_ns = age;
343 fs_entry.size_bytes = fsize;
344
345 if (age > policy_.max_retention_ns)
347 else if (age > policy_.archive_after_ns)
349 else
350 fs_entry.status = FileStatus::Classification::ACTIVE;
351
352 result.push_back(std::move(fs_entry));
353 }
354
355 // Sort by age (oldest first)
356 std::sort(result.begin(), result.end(),
357 [](const FileStatus& a, const FileStatus& b) {
358 return a.age_ns > b.age_ns;
359 });
360
361 return result;
362 }
363
365 [[nodiscard]] const RetentionPolicy& policy() const noexcept { return policy_; }
366
367private:
368 RetentionPolicy policy_;
369 ArchiveCallback archive_cb_;
370 PreDeleteCallback pre_delete_cb_;
371};
372
373} // namespace signet::forge
Manages log file lifecycle: retention, archival, and deletion.
void set_pre_delete_callback(PreDeleteCallback cb)
Set the pre-deletion callback.
RetentionSummary enforce(const std::string &log_dir, int64_t now_ns) const
Enforce retention policy on a log directory.
std::function< bool(const std::string &source_path)> ArchiveCallback
Callback invoked to archive a file.
LogRetentionManager(RetentionPolicy policy={})
std::function< bool(const std::string &path)> PreDeleteCallback
Callback invoked before deleting a file.
void set_archive_callback(ArchiveCallback cb)
Set the archival callback.
std::vector< FileStatus > list_files(const std::string &log_dir, int64_t now_ns) const
const RetentionPolicy & policy() const noexcept
Get the current retention policy.
int64_t now_ns()
Return the current time as nanoseconds since the Unix epoch (UTC).
List all managed log files in a directory with their age classification.
enum signet::forge::LogRetentionManager::FileStatus::Classification status
Retention policy configuration for log lifecycle management.
std::string file_suffix
File name pattern to match (glob-style suffix).
int64_t max_retention_ns
Maximum retention period in nanoseconds.
uint64_t max_active_files
Maximum number of active log files.
bool enable_deletion
If true, actually delete files past max_retention_ns.
uint64_t max_active_size_bytes
Maximum total size of active (non-archived) log files in bytes.
int64_t archive_after_ns
Archival threshold in nanoseconds.
int64_t min_retention_ns
Minimum retention period in nanoseconds.
bool enable_archival
If true, actually archive files past archive_after_ns.
Summary of a retention enforcement pass.
bool dry_run
Whether the enforcement pass was a dry-run.
std::vector< std::string > deleted_paths
Paths of files that were deleted.
uint64_t deleted_size_bytes
Total size of deleted files in bytes.
std::vector< std::string > errors
Error messages from failed operations.
int64_t files_archived
Number of files archived (or would be archived in dry-run).
uint64_t archived_size_bytes
Total size of archived files in bytes.
int64_t files_scanned
Number of files scanned.
uint64_t active_size_bytes
Total size of active files in bytes.
std::vector< std::string > archived_paths
Paths of files that were archived.
int64_t files_active
Number of files within active retention window.
int64_t files_failed
Number of files that failed archival or deletion.
int64_t files_deleted
Number of files deleted (or would be deleted in dry-run).