Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
log_retention.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3// See LICENSE_COMMERCIAL for full terms.
4#pragma once
5
6#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
7#error "signet/ai/log_retention.hpp requires SIGNET_ENABLE_COMMERCIAL=ON (AGPL-3.0 commercial tier). See LICENSE_COMMERCIAL."
8#endif
9
10// ---------------------------------------------------------------------------
11// log_retention.hpp -- Log Retention Lifecycle Manager
12//
13// EU AI Act Art.12(3) requires automatic recording of events over the
14// lifetime of the system with defined retention periods (minimum 6 months).
15// MiFID II RTS 24 Art.4 requires 5-year retention for order records.
16//
17// This module provides:
18// - RetentionPolicy: configurable TTL, archival, and size-based rotation
19// - LogRetentionManager: scans log directories and enforces retention
20// - Archival callback for moving expired logs to cold storage
21// - Deletion of logs past the maximum retention period
22// - Compliance summary reporting
23//
24// Header-only. Part of the signet::forge AI module.
25// ---------------------------------------------------------------------------
26
27#include "signet/error.hpp"
28
29#include <algorithm>
30#include <chrono>
31#include <cstdint>
32#include <filesystem>
33#include <functional>
34#include <string>
35#include <vector>
36
37namespace signet::forge {
38
39// ---------------------------------------------------------------------------
40// RetentionPolicy
41// ---------------------------------------------------------------------------
42
49 int64_t min_retention_ns = INT64_C(15778800000000000); // ~6 months
50
54 int64_t max_retention_ns = INT64_C(157788000000000000); // ~5 years
55
60 int64_t archive_after_ns = INT64_C(31557600000000000); // ~1 year
61
65 uint64_t max_active_size_bytes = UINT64_C(10737418240); // 10 GB
66
70 uint64_t max_active_files = 10000;
71
75 std::string file_suffix = ".parquet";
76
79 bool enable_deletion = false;
80
83 bool enable_archival = false;
84};
85
86// ---------------------------------------------------------------------------
87// RetentionSummary
88// ---------------------------------------------------------------------------
89
93 int64_t files_scanned = 0;
94
96 int64_t files_active = 0;
97
99 int64_t files_archived = 0;
100
102 int64_t files_deleted = 0;
103
105 int64_t files_failed = 0;
106
108 uint64_t active_size_bytes = 0;
109
112
114 uint64_t deleted_size_bytes = 0;
115
117 bool dry_run = true;
118
120 std::vector<std::string> archived_paths;
121
123 std::vector<std::string> deleted_paths;
124
126 std::vector<std::string> errors;
127};
128
129// ---------------------------------------------------------------------------
130// LogRetentionManager
131// ---------------------------------------------------------------------------
132
151public:
155 using ArchiveCallback = std::function<bool(const std::string& source_path)>;
156
159 using PreDeleteCallback = std::function<bool(const std::string& path)>;
160
162 : policy_(std::move(policy)) {
163 (void)commercial::require_feature("LogRetentionManager");
164 }
165
168 archive_cb_ = std::move(cb);
169 }
170
173 pre_delete_cb_ = std::move(cb);
174 }
175
185 [[nodiscard]] inline RetentionSummary enforce(
186 const std::string& log_dir, int64_t now_ns) const {
187 RetentionSummary summary;
188 summary.dry_run = !(policy_.enable_archival || policy_.enable_deletion);
189
190 namespace fs = std::filesystem;
191
192 if (!fs::exists(log_dir) || !fs::is_directory(log_dir)) {
193 summary.errors.push_back("Directory does not exist: " + log_dir);
194 return summary;
195 }
196
197 // Collect matching files with their modification times
198 struct FileInfo {
199 std::string path;
200 int64_t mod_time_ns;
201 uint64_t size_bytes;
202 };
203 std::vector<FileInfo> files;
204
205 for (const auto& entry : fs::directory_iterator(log_dir)) {
206 if (!entry.is_regular_file()) continue;
207
208 const auto& path = entry.path();
209 if (!path.string().ends_with(policy_.file_suffix)) continue;
210
211 std::error_code ec;
212 auto mod_time = fs::last_write_time(path, ec);
213 if (ec) continue;
214
215 // Convert file_time_type to nanoseconds since epoch
216 auto sys_time = std::chrono::file_clock::to_sys(mod_time);
217 auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
218 sys_time.time_since_epoch()).count();
219
220 auto fsize = entry.file_size(ec);
221 if (ec) fsize = 0;
222
223 files.push_back({path.string(), static_cast<int64_t>(ns), fsize});
224 }
225
226 // Sort by modification time (oldest first)
227 std::sort(files.begin(), files.end(),
228 [](const FileInfo& a, const FileInfo& b) {
229 return a.mod_time_ns < b.mod_time_ns;
230 });
231
232 summary.files_scanned = static_cast<int64_t>(files.size());
233
234 for (const auto& fi : files) {
235 int64_t age_ns = now_ns - fi.mod_time_ns;
236
237 if (age_ns > policy_.max_retention_ns) {
238 // Past maximum retention — eligible for deletion
239 if (policy_.enable_deletion) {
240 bool allow = true;
241 if (pre_delete_cb_) {
242 allow = pre_delete_cb_(fi.path);
243 }
244 if (allow) {
245 std::error_code ec;
246 fs::remove(fi.path, ec);
247 if (ec) {
248 summary.errors.push_back("Failed to delete: " + fi.path
249 + " (" + ec.message() + ")");
250 ++summary.files_failed;
251 } else {
252 ++summary.files_deleted;
253 summary.deleted_size_bytes += fi.size_bytes;
254 summary.deleted_paths.push_back(fi.path);
255 }
256 } else {
257 ++summary.files_failed;
258 }
259 } else {
260 // Dry-run
261 ++summary.files_deleted;
262 summary.deleted_size_bytes += fi.size_bytes;
263 summary.deleted_paths.push_back(fi.path);
264 }
265 } else if (age_ns > policy_.archive_after_ns) {
266 // Past archival threshold — eligible for archival
267 if (policy_.enable_archival && archive_cb_) {
268 bool ok = archive_cb_(fi.path);
269 if (ok) {
270 ++summary.files_archived;
271 summary.archived_size_bytes += fi.size_bytes;
272 summary.archived_paths.push_back(fi.path);
273 } else {
274 summary.errors.push_back("Archive callback failed: " + fi.path);
275 ++summary.files_failed;
276 }
277 } else {
278 // Dry-run or no callback
279 ++summary.files_archived;
280 summary.archived_size_bytes += fi.size_bytes;
281 summary.archived_paths.push_back(fi.path);
282 }
283 } else {
284 // Active — within retention window
285 ++summary.files_active;
286 summary.active_size_bytes += fi.size_bytes;
287 }
288 }
289
290 // Check size-based overflow: if active files exceed max, archive oldest
291 if (summary.active_size_bytes > policy_.max_active_size_bytes) {
292 // Already sorted oldest-first — the active files are the ones
293 // not already in archived/deleted lists. This is informational
294 // in the summary for now; the caller can re-invoke with a
295 // shorter archive_after_ns to trigger size-based archival.
296 }
297
298 return summary;
299 }
300
306 struct FileStatus {
307 std::string path;
308 int64_t age_ns;
309 uint64_t size_bytes;
311 };
312
313 [[nodiscard]] inline std::vector<FileStatus> list_files(
314 const std::string& log_dir, int64_t now_ns) const {
315 std::vector<FileStatus> result;
316 namespace fs = std::filesystem;
317
318 if (!fs::exists(log_dir) || !fs::is_directory(log_dir)) return result;
319
320 for (const auto& entry : fs::directory_iterator(log_dir)) {
321 if (!entry.is_regular_file()) continue;
322 const auto& path = entry.path();
323 if (!path.string().ends_with(policy_.file_suffix)) continue;
324
325 std::error_code ec;
326 auto mod_time = fs::last_write_time(path, ec);
327 if (ec) continue;
328
329 auto sys_time = std::chrono::file_clock::to_sys(mod_time);
330 auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
331 sys_time.time_since_epoch()).count();
332
333 auto fsize = entry.file_size(ec);
334 if (ec) fsize = 0;
335
336 int64_t age = now_ns - static_cast<int64_t>(ns);
337
338 FileStatus fs_entry;
339 fs_entry.path = path.string();
340 fs_entry.age_ns = age;
341 fs_entry.size_bytes = fsize;
342
343 if (age > policy_.max_retention_ns)
345 else if (age > policy_.archive_after_ns)
347 else
348 fs_entry.status = FileStatus::Classification::ACTIVE;
349
350 result.push_back(std::move(fs_entry));
351 }
352
353 // Sort by age (oldest first)
354 std::sort(result.begin(), result.end(),
355 [](const FileStatus& a, const FileStatus& b) {
356 return a.age_ns > b.age_ns;
357 });
358
359 return result;
360 }
361
363 [[nodiscard]] const RetentionPolicy& policy() const noexcept { return policy_; }
364
365private:
366 RetentionPolicy policy_;
367 ArchiveCallback archive_cb_;
368 PreDeleteCallback pre_delete_cb_;
369};
370
371} // namespace signet::forge
Manages log file lifecycle: retention, archival, and deletion.
void set_pre_delete_callback(PreDeleteCallback cb)
Set the pre-deletion callback.
RetentionSummary enforce(const std::string &log_dir, int64_t now_ns) const
Enforce retention policy on a log directory.
std::function< bool(const std::string &source_path)> ArchiveCallback
Callback invoked to archive a file.
LogRetentionManager(RetentionPolicy policy={})
std::function< bool(const std::string &path)> PreDeleteCallback
Callback invoked before deleting a file.
void set_archive_callback(ArchiveCallback cb)
Set the archival callback.
std::vector< FileStatus > list_files(const std::string &log_dir, int64_t now_ns) const
const RetentionPolicy & policy() const noexcept
Get the current retention policy.
int64_t now_ns()
Return the current time as nanoseconds since the Unix epoch (UTC).
List all managed log files in a directory with their age classification.
enum signet::forge::LogRetentionManager::FileStatus::Classification status
Retention policy configuration for log lifecycle management.
std::string file_suffix
File name pattern to match (glob-style suffix).
int64_t max_retention_ns
Maximum retention period in nanoseconds.
uint64_t max_active_files
Maximum number of active log files.
bool enable_deletion
If true, actually delete files past max_retention_ns.
uint64_t max_active_size_bytes
Maximum total size of active (non-archived) log files in bytes.
int64_t archive_after_ns
Archival threshold in nanoseconds.
int64_t min_retention_ns
Minimum retention period in nanoseconds.
bool enable_archival
If true, actually archive files past archive_after_ns.
Summary of a retention enforcement pass.
bool dry_run
Whether the enforcement pass was a dry-run.
std::vector< std::string > deleted_paths
Paths of files that were deleted.
uint64_t deleted_size_bytes
Total size of deleted files in bytes.
std::vector< std::string > errors
Error messages from failed operations.
int64_t files_archived
Number of files archived (or would be archived in dry-run).
uint64_t archived_size_bytes
Total size of archived files in bytes.
int64_t files_scanned
Number of files scanned.
uint64_t active_size_bytes
Total size of active files in bytes.
std::vector< std::string > archived_paths
Paths of files that were archived.
int64_t files_active
Number of files within active retention window.
int64_t files_failed
Number of files that failed archival or deletion.
int64_t files_deleted
Number of files deleted (or would be deleted in dry-run).