Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
lz4.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3#pragma once
4
28
29#ifdef SIGNET_HAS_LZ4
30
32
33#include <lz4.h>
34
35#include <cstdint>
36#include <limits>
37#include <string>
38#include <vector>
39
40namespace signet::forge {
41
50class Lz4RawCodec : public CompressionCodec {
51public:
59 [[nodiscard]] expected<std::vector<uint8_t>> compress(
60 const uint8_t* data, size_t size) const override {
61
62 if (size == 0) {
63 return std::vector<uint8_t>{};
64 }
65
66 // CWE-190: Integer Overflow (liblz4 uses int for sizes)
67 if (size > static_cast<size_t>((std::numeric_limits<int>::max)())) {
68 return Error{ErrorCode::INTERNAL_ERROR,
69 "LZ4: input exceeds int32 limit"};
70 }
71
72 // LZ4_compressBound returns the maximum compressed size.
73 int max_compressed = LZ4_compressBound(static_cast<int>(size));
74 if (max_compressed <= 0) {
75 return Error{ErrorCode::INTERNAL_ERROR,
76 "LZ4: input too large for LZ4_compressBound"};
77 }
78
79 std::vector<uint8_t> out(static_cast<size_t>(max_compressed));
80
81 int compressed_size = LZ4_compress_default(
82 reinterpret_cast<const char*>(data),
83 reinterpret_cast<char*>(out.data()),
84 static_cast<int>(size),
85 max_compressed);
86
87 if (compressed_size <= 0) {
88 return Error{ErrorCode::INTERNAL_ERROR,
89 "LZ4 compress failed (returned " +
90 std::to_string(compressed_size) + ")"};
91 }
92
93 out.resize(static_cast<size_t>(compressed_size));
94 return out;
95 }
96
105 [[nodiscard]] expected<std::vector<uint8_t>> decompress(
106 const uint8_t* data, size_t size,
107 size_t uncompressed_size) const override {
108
109 static constexpr size_t MAX_DECOMPRESS_SIZE = 256 * 1024 * 1024; // 256 MB
110 if (uncompressed_size > MAX_DECOMPRESS_SIZE)
111 return Error{ErrorCode::INVALID_ARGUMENT, "Decompression size exceeds 256 MB limit"};
112
113 if (uncompressed_size == 0) {
114 return std::vector<uint8_t>{};
115 }
116
117 // CWE-190: Integer Overflow (liblz4 uses int for sizes)
118 if (size > static_cast<size_t>((std::numeric_limits<int>::max)())) {
119 return Error{ErrorCode::INTERNAL_ERROR,
120 "LZ4: compressed input exceeds int32 limit"};
121 }
122 // CWE-190: Integer Overflow (liblz4 uses int for sizes)
123 if (uncompressed_size > static_cast<size_t>((std::numeric_limits<int>::max)())) {
124 return Error{ErrorCode::INTERNAL_ERROR,
125 "LZ4: uncompressed size exceeds int32 limit"};
126 }
127
128 std::vector<uint8_t> out(uncompressed_size);
129
130 int decompressed_size = LZ4_decompress_safe(
131 reinterpret_cast<const char*>(data),
132 reinterpret_cast<char*>(out.data()),
133 static_cast<int>(size),
134 static_cast<int>(uncompressed_size));
135
136 if (decompressed_size < 0) {
137 return Error{ErrorCode::CORRUPT_PAGE,
138 "LZ4 decompress failed (returned " +
139 std::to_string(decompressed_size) + ")"};
140 }
141
142 if (static_cast<size_t>(decompressed_size) != uncompressed_size) {
143 return Error{ErrorCode::CORRUPT_PAGE,
144 "LZ4: decompressed " +
145 std::to_string(decompressed_size) +
146 " bytes but expected " +
147 std::to_string(uncompressed_size)};
148 }
149
150 return out;
151 }
152
155
157 [[nodiscard]] Compression codec_type() const override {
158 return Compression::LZ4_RAW;
159 }
160
162 [[nodiscard]] const char* name() const override {
163 return "lz4_raw";
164 }
165
167};
168
169// ===========================================================================
170// Auto-registration helper
171// ===========================================================================
172
179inline void register_lz4_codec() {
180 CodecRegistry::instance().register_codec(std::make_unique<Lz4RawCodec>());
181}
182
183} // namespace signet::forge
184
185#endif // SIGNET_HAS_LZ4
void register_codec(std::unique_ptr< CompressionCodec > codec)
Register a codec, transferring ownership to the registry.
Definition codec.hpp:105
static CodecRegistry & instance()
Access the process-wide singleton instance.
Definition codec.hpp:94
Compression codec interface and registry for Signet Forge.
Compression
Parquet compression codecs.
Definition types.hpp:115
expected< std::vector< uint8_t > > decompress(Compression codec, const uint8_t *data, size_t size, size_t uncompressed_size)
Decompress data using the specified codec via the global CodecRegistry.
Definition codec.hpp:213
expected< std::vector< uint8_t > > compress(Compression codec, const uint8_t *data, size_t size)
Compress data using the specified codec via the global CodecRegistry.
Definition codec.hpp:183